diff --git a/LICENSE.txt b/LICENSE.txt index c02c70e..bcba007 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,515 +1,27 @@ - -CeCILL-B FREE SOFTWARE LICENSE AGREEMENT - - - Notice - -This Agreement is a Free Software license agreement that is the result -of discussions between its authors in order to ensure compliance with -the two main principles guiding its drafting: - - * firstly, compliance with the principles governing the distribution - of Free Software: access to source code, broad rights granted to - users, - * secondly, the election of a governing law, French law, with which - it is conformant, both as regards the law of torts and - intellectual property law, and the protection that it offers to - both authors and holders of the economic rights over software. - -The authors of the CeCILL-B (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre]) -license are: - -Commissariat à l'Energie Atomique - CEA, a public scientific, technical -and industrial research establishment, having its principal place of -business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France. - -Centre National de la Recherche Scientifique - CNRS, a public scientific -and technological establishment, having its principal place of business -at 3 rue Michel-Ange, 75794 Paris cedex 16, France. - -Institut National de Recherche en Informatique et en Automatique - -INRIA, a public scientific and technological establishment, having its -principal place of business at Domaine de Voluceau, Rocquencourt, BP -105, 78153 Le Chesnay cedex, France. - - - Preamble - -This Agreement is an open source software license intended to give users -significant freedom to modify and redistribute the software licensed -hereunder. - -The exercising of this freedom is conditional upon a strong obligation -of giving credits for everybody that distributes a software -incorporating a software ruled by the current license so as all -contributions to be properly identified and acknowledged. - -In consideration of access to the source code and the rights to copy, -modify and redistribute granted by the license, users are provided only -with a limited warranty and the software's author, the holder of the -economic rights, and the successive licensors only have limited liability. - -In this respect, the risks associated with loading, using, modifying -and/or developing or reproducing the software by the user are brought to -the user's attention, given its Free Software status, which may make it -complicated to use, with the result that its use is reserved for -developers and experienced professionals having in-depth computer -knowledge. Users are therefore encouraged to load and test the -suitability of the software as regards their requirements in conditions -enabling the security of their systems and/or data to be ensured and, -more generally, to use and operate it in the same conditions of -security. This Agreement may be freely reproduced and published, -provided it is not altered, and that no provisions are either added or -removed herefrom. - -This Agreement may apply to any or all software for which the holder of -the economic rights decides to submit the use thereof to its provisions. - - - Article 1 - DEFINITIONS - -For the purpose of this Agreement, when the following expressions -commence with a capital letter, they shall have the following meaning: - -Agreement: means this license agreement, and its possible subsequent -versions and annexes. - -Software: means the software in its Object Code and/or Source Code form -and, where applicable, its documentation, "as is" when the Licensee -accepts the Agreement. - -Initial Software: means the Software in its Source Code and possibly its -Object Code form and, where applicable, its documentation, "as is" when -it is first distributed under the terms and conditions of the Agreement. - -Modified Software: means the Software modified by at least one -Contribution. - -Source Code: means all the Software's instructions and program lines to -which access is required so as to modify the Software. - -Object Code: means the binary files originating from the compilation of -the Source Code. - -Holder: means the holder(s) of the economic rights over the Initial -Software. - -Licensee: means the Software user(s) having accepted the Agreement. - -Contributor: means a Licensee having made at least one Contribution. - -Licensor: means the Holder, or any other individual or legal entity, who -distributes the Software under the Agreement. - -Contribution: means any or all modifications, corrections, translations, -adaptations and/or new functions integrated into the Software by any or -all Contributors, as well as any or all Internal Modules. - -Module: means a set of sources files including their documentation that -enables supplementary functions or services in addition to those offered -by the Software. - -External Module: means any or all Modules, not derived from the -Software, so that this Module and the Software run in separate address -spaces, with one calling the other when they are run. - -Internal Module: means any or all Module, connected to the Software so -that they both execute in the same address space. - -Parties: mean both the Licensee and the Licensor. - -These expressions may be used both in singular and plural form. - - - Article 2 - PURPOSE - -The purpose of the Agreement is the grant by the Licensor to the -Licensee of a non-exclusive, transferable and worldwide license for the -Software as set forth in Article 5 hereinafter for the whole term of the -protection granted by the rights over said Software. - - - Article 3 - ACCEPTANCE - -3.1 The Licensee shall be deemed as having accepted the terms and -conditions of this Agreement upon the occurrence of the first of the -following events: - - * (i) loading the Software by any or all means, notably, by - downloading from a remote server, or by loading from a physical - medium; - * (ii) the first time the Licensee exercises any of the rights - granted hereunder. - -3.2 One copy of the Agreement, containing a notice relating to the -characteristics of the Software, to the limited warranty, and to the -fact that its use is restricted to experienced users has been provided -to the Licensee prior to its acceptance as set forth in Article 3.1 -hereinabove, and the Licensee hereby acknowledges that it has read and -understood it. - - - Article 4 - EFFECTIVE DATE AND TERM - - - 4.1 EFFECTIVE DATE - -The Agreement shall become effective on the date when it is accepted by -the Licensee as set forth in Article 3.1. - - - 4.2 TERM - -The Agreement shall remain in force for the entire legal term of -protection of the economic rights over the Software. - - - Article 5 - SCOPE OF RIGHTS GRANTED - -The Licensor hereby grants to the Licensee, who accepts, the following -rights over the Software for any or all use, and for the term of the -Agreement, on the basis of the terms and conditions set forth hereinafter. - -Besides, if the Licensor owns or comes to own one or more patents -protecting all or part of the functions of the Software or of its -components, the Licensor undertakes not to enforce the rights granted by -these patents against successive Licensees using, exploiting or -modifying the Software. If these patents are transferred, the Licensor -undertakes to have the transferees subscribe to the obligations set -forth in this paragraph. - - - 5.1 RIGHT OF USE - -The Licensee is authorized to use the Software, without any limitation -as to its fields of application, with it being hereinafter specified -that this comprises: - - 1. permanent or temporary reproduction of all or part of the Software - by any or all means and in any or all form. - - 2. loading, displaying, running, or storing the Software on any or - all medium. - - 3. entitlement to observe, study or test its operation so as to - determine the ideas and principles behind any or all constituent - elements of said Software. This shall apply when the Licensee - carries out any or all loading, displaying, running, transmission - or storage operation as regards the Software, that it is entitled - to carry out hereunder. - - - 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS - -The right to make Contributions includes the right to translate, adapt, -arrange, or make any or all modifications to the Software, and the right -to reproduce the resulting software. - -The Licensee is authorized to make any or all Contributions to the -Software provided that it includes an explicit notice that it is the -author of said Contribution and indicates the date of the creation thereof. - - - 5.3 RIGHT OF DISTRIBUTION - -In particular, the right of distribution includes the right to publish, -transmit and communicate the Software to the general public on any or -all medium, and by any or all means, and the right to market, either in -consideration of a fee, or free of charge, one or more copies of the -Software by any means. - -The Licensee is further authorized to distribute copies of the modified -or unmodified Software to third parties according to the terms and -conditions set forth hereinafter. - - - 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION - -The Licensee is authorized to distribute true copies of the Software in -Source Code or Object Code form, provided that said distribution -complies with all the provisions of the Agreement and is accompanied by: - - 1. a copy of the Agreement, - - 2. a notice relating to the limitation of both the Licensor's - warranty and liability as set forth in Articles 8 and 9, - -and that, in the event that only the Object Code of the Software is -redistributed, the Licensee allows effective access to the full Source -Code of the Software at a minimum during the entire period of its -distribution of the Software, it being understood that the additional -cost of acquiring the Source Code shall not exceed the cost of -transferring the data. - - - 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE - -If the Licensee makes any Contribution to the Software, the resulting -Modified Software may be distributed under a license agreement other -than this Agreement subject to compliance with the provisions of Article -5.3.4. - - - 5.3.3 DISTRIBUTION OF EXTERNAL MODULES - -When the Licensee has developed an External Module, the terms and -conditions of this Agreement do not apply to said External Module, that -may be distributed under a separate license agreement. - - - 5.3.4 CREDITS - -Any Licensee who may distribute a Modified Software hereby expressly -agrees to: - - 1. indicate in the related documentation that it is based on the - Software licensed hereunder, and reproduce the intellectual - property notice for the Software, - - 2. ensure that written indications of the Software intended use, - intellectual property notice and license hereunder are included in - easily accessible format from the Modified Software interface, - - 3. mention, on a freely accessible website describing the Modified - Software, at least throughout the distribution term thereof, that - it is based on the Software licensed hereunder, and reproduce the - Software intellectual property notice, - - 4. where it is distributed to a third party that may distribute a - Modified Software without having to make its source code - available, make its best efforts to ensure that said third party - agrees to comply with the obligations set forth in this Article . - -If the Software, whether or not modified, is distributed with an -External Module designed for use in connection with the Software, the -Licensee shall submit said External Module to the foregoing obligations. - - - 5.3.5 COMPATIBILITY WITH THE CeCILL AND CeCILL-C LICENSES - -Where a Modified Software contains a Contribution subject to the CeCILL -license, the provisions set forth in Article 5.3.4 shall be optional. - -A Modified Software may be distributed under the CeCILL-C license. In -such a case the provisions set forth in Article 5.3.4 shall be optional. - - - Article 6 - INTELLECTUAL PROPERTY - - - 6.1 OVER THE INITIAL SOFTWARE - -The Holder owns the economic rights over the Initial Software. Any or -all use of the Initial Software is subject to compliance with the terms -and conditions under which the Holder has elected to distribute its work -and no one shall be entitled to modify the terms and conditions for the -distribution of said Initial Software. - -The Holder undertakes that the Initial Software will remain ruled at -least by this Agreement, for the duration set forth in Article 4.2. - - - 6.2 OVER THE CONTRIBUTIONS - -The Licensee who develops a Contribution is the owner of the -intellectual property rights over this Contribution as defined by -applicable law. - - - 6.3 OVER THE EXTERNAL MODULES - -The Licensee who develops an External Module is the owner of the -intellectual property rights over this External Module as defined by -applicable law and is free to choose the type of agreement that shall -govern its distribution. - - - 6.4 JOINT PROVISIONS - -The Licensee expressly undertakes: - - 1. not to remove, or modify, in any manner, the intellectual property - notices attached to the Software; - - 2. to reproduce said notices, in an identical manner, in the copies - of the Software modified or not. - -The Licensee undertakes not to directly or indirectly infringe the -intellectual property rights of the Holder and/or Contributors on the -Software and to take, where applicable, vis-à-vis its staff, any and all -measures required to ensure respect of said intellectual property rights -of the Holder and/or Contributors. - - - Article 7 - RELATED SERVICES - -7.1 Under no circumstances shall the Agreement oblige the Licensor to -provide technical assistance or maintenance services for the Software. - -However, the Licensor is entitled to offer this type of services. The -terms and conditions of such technical assistance, and/or such -maintenance, shall be set forth in a separate instrument. Only the -Licensor offering said maintenance and/or technical assistance services -shall incur liability therefor. - -7.2 Similarly, any Licensor is entitled to offer to its licensees, under -its sole responsibility, a warranty, that shall only be binding upon -itself, for the redistribution of the Software and/or the Modified -Software, under terms and conditions that it is free to decide. Said -warranty, and the financial terms and conditions of its application, -shall be subject of a separate instrument executed between the Licensor -and the Licensee. - - - Article 8 - LIABILITY - -8.1 Subject to the provisions of Article 8.2, the Licensee shall be -entitled to claim compensation for any direct loss it may have suffered -from the Software as a result of a fault on the part of the relevant -Licensor, subject to providing evidence thereof. - -8.2 The Licensor's liability is limited to the commitments made under -this Agreement and shall not be incurred as a result of in particular: -(i) loss due the Licensee's total or partial failure to fulfill its -obligations, (ii) direct or consequential loss that is suffered by the -Licensee due to the use or performance of the Software, and (iii) more -generally, any consequential loss. In particular the Parties expressly -agree that any or all pecuniary or business loss (i.e. loss of data, -loss of profits, operating loss, loss of customers or orders, -opportunity cost, any disturbance to business activities) or any or all -legal proceedings instituted against the Licensee by a third party, -shall constitute consequential loss and shall not provide entitlement to -any or all compensation from the Licensor. - - - Article 9 - WARRANTY - -9.1 The Licensee acknowledges that the scientific and technical -state-of-the-art when the Software was distributed did not enable all -possible uses to be tested and verified, nor for the presence of -possible defects to be detected. In this respect, the Licensee's -attention has been drawn to the risks associated with loading, using, -modifying and/or developing and reproducing the Software which are -reserved for experienced users. - -The Licensee shall be responsible for verifying, by any or all means, -the suitability of the product for its requirements, its good working -order, and for ensuring that it shall not cause damage to either persons -or properties. - -9.2 The Licensor hereby represents, in good faith, that it is entitled -to grant all the rights over the Software (including in particular the -rights set forth in Article 5). - -9.3 The Licensee acknowledges that the Software is supplied "as is" by -the Licensor without any other express or tacit warranty, other than -that provided for in Article 9.2 and, in particular, without any warranty -as to its commercial value, its secured, safe, innovative or relevant -nature. - -Specifically, the Licensor does not warrant that the Software is free -from any error, that it will operate without interruption, that it will -be compatible with the Licensee's own equipment and software -configuration, nor that it will meet the Licensee's requirements. - -9.4 The Licensor does not either expressly or tacitly warrant that the -Software does not infringe any third party intellectual property right -relating to a patent, software or any other property right. Therefore, -the Licensor disclaims any and all liability towards the Licensee -arising out of any or all proceedings for infringement that may be -instituted in respect of the use, modification and redistribution of the -Software. Nevertheless, should such proceedings be instituted against -the Licensee, the Licensor shall provide it with technical and legal -assistance for its defense. Such technical and legal assistance shall be -decided on a case-by-case basis between the relevant Licensor and the -Licensee pursuant to a memorandum of understanding. The Licensor -disclaims any and all liability as regards the Licensee's use of the -name of the Software. No warranty is given as regards the existence of -prior rights over the name of the Software or as regards the existence -of a trademark. - - - Article 10 - TERMINATION - -10.1 In the event of a breach by the Licensee of its obligations -hereunder, the Licensor may automatically terminate this Agreement -thirty (30) days after notice has been sent to the Licensee and has -remained ineffective. - -10.2 A Licensee whose Agreement is terminated shall no longer be -authorized to use, modify or distribute the Software. However, any -licenses that it may have granted prior to termination of the Agreement -shall remain valid subject to their having been granted in compliance -with the terms and conditions hereof. - - - Article 11 - MISCELLANEOUS - - - 11.1 EXCUSABLE EVENTS - -Neither Party shall be liable for any or all delay, or failure to -perform the Agreement, that may be attributable to an event of force -majeure, an act of God or an outside cause, such as defective -functioning or interruptions of the electricity or telecommunications -networks, network paralysis following a virus attack, intervention by -government authorities, natural disasters, water damage, earthquakes, -fire, explosions, strikes and labor unrest, war, etc. - -11.2 Any failure by either Party, on one or more occasions, to invoke -one or more of the provisions hereof, shall under no circumstances be -interpreted as being a waiver by the interested Party of its right to -invoke said provision(s) subsequently. - -11.3 The Agreement cancels and replaces any or all previous agreements, -whether written or oral, between the Parties and having the same -purpose, and constitutes the entirety of the agreement between said -Parties concerning said purpose. No supplement or modification to the -terms and conditions hereof shall be effective as between the Parties -unless it is made in writing and signed by their duly authorized -representatives. - -11.4 In the event that one or more of the provisions hereof were to -conflict with a current or future applicable act or legislative text, -said act or legislative text shall prevail, and the Parties shall make -the necessary amendments so as to comply with said act or legislative -text. All other provisions shall remain effective. Similarly, invalidity -of a provision of the Agreement, for any reason whatsoever, shall not -cause the Agreement as a whole to be invalid. - - - 11.5 LANGUAGE - -The Agreement is drafted in both French and English and both versions -are deemed authentic. - - - Article 12 - NEW VERSIONS OF THE AGREEMENT - -12.1 Any person is authorized to duplicate and distribute copies of this -Agreement. - -12.2 So as to ensure coherence, the wording of this Agreement is -protected and may only be modified by the authors of the License, who -reserve the right to periodically publish updates or new versions of the -Agreement, each with a separate number. These subsequent versions may -address new issues encountered by Free Software. - -12.3 Any Software distributed under a given version of the Agreement may -only be subsequently distributed under the same version of the Agreement -or a subsequent version. - - - Article 13 - GOVERNING LAW AND JURISDICTION - -13.1 The Agreement is governed by French law. The Parties agree to -endeavor to seek an amicable solution to any disagreements or disputes -that may arise during the performance of the Agreement. - -13.2 Failing an amicable solution within two (2) months as from their -occurrence, and unless emergency proceedings are necessary, the -disagreements or disputes shall be referred to the Paris Courts having -jurisdiction, by the more diligent Party. - - -Version 1.0 dated 2006-09-05. +Copyright (c) 2019, Transonic developers and Pierre Augier +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/kuramoto_sivashinksy/3-fast-as-C.ipynb b/kuramoto_sivashinksy/3-fast-as-C.ipynb new file mode 100644 index 0000000..fdc9835 --- /dev/null +++ b/kuramoto_sivashinksy/3-fast-as-C.ipynb @@ -0,0 +1,1270 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Julia is as fast as compiled C, Fortran" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Plots.GRBackend()" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "using Plots; gr()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Performance benchmarks: common code patterns\n", + "\n", + "![microbenchmark timing plot](figs/benchmarks.png)|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Geometric means of benchmark timings, normalized so C=1\n", + "\n", + "| cputime | language |\n", + "|---------|----------|\n", + "| 1.000\t| C |\n", + "| 1.053\t| Julia |\n", + "| 1.085\t| LuaJIT | \n", + "| 1.501\t| Fortran | \n", + "| 1.514\t| Go |\n", + "| 3.360\t| Java |\n", + "| 3.458\t| JavaScript |\n", + "| 14.093 | \tMathematica | \n", + "| 26.142 |\tMatlab |\n", + "| 31.971 | \tPython |\n", + "| 69.574 | \tR\n", + "| 650.622 |\tOctave }\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cputime\tlang\n", + "1.000\tC\n", + "1.053\tJulia\n", + "1.085\tLuaJIT\n", + "1.501\tFortran\n", + "1.514\tGo\n", + "3.360\tJava\n", + "3.458\tJavaScript\n", + "14.093\tMathematica\n", + "26.142\tMatlab\n", + "31.971\tPython\n", + "69.574\tR\n", + "650.622\tOctave\n" + ] + } + ], + "source": [ + "1.000\tC\n", + "1.053\tJulia\n", + "1.085\tLuaJIT\n", + "1.501\tFortran\n", + "1.514\tGo\n", + "3.360\tJava\n", + "3.458\tJavaScript\n", + "14.093\tMathematica\n", + "26.142\tMatlab\n", + "31.971\tPython\n", + "69.574\tR\n", + "650.622\tOctave\n", + "listgeomean(cputime,lang)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This benchmark data is for julia-0.7.0-DEV, gcc-4.8.5, gfortran-4.8.5, SciLua-1.0.0-$\\beta2$, Go 1.7.5, Java 1.8.0, JavaScript V8-4.5, Mathematica 11.1.1, Anaconda Python 3.5 with Numpy, Matlab R2017a, R 3.3.1, and Octave 4.0.3, running serially on an Intel(R) Core(TM) i7-3960X 3.30GHz CPU with 64GB of 1600MHz DDR3 RAM, running openSUSE LEAP 42.2 Linux." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Performance benchmark: Kuramoto-Sivashinksy eqn\n", + "\n", + "The Kuramoto-Sivashinsky (KS) equation is a nonlinear time-evolving partial differential equation (PDE) on a 1d spatial domain.\n", + "\n", + "\\begin{equation*}\n", + "u_t = - u_{xx} - u_{xxxx} - u u_x\n", + "\\end{equation*}\n", + "\n", + "where $x$ is space, $t$ is time, and subscripts indicate differentiation. We assume a spatial domain $x \\in [0, L_x]$ with periodic boundary conditions and initial condition $u(x,0) = u_0(x)$. \n", + "\n", + "A simulation for $u_0 = \\cos(x) + 0.1 \\sin x/8 + 0.01 \\cos x/32$ and $L_x = 64\\pi$.\n", + "\n", + "![KS eqn u(x,t) plot](figs/ksdynamics.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mathematics of numerical simulation: KS-CNAB2 algorithm\n", + "\n", + "Start from the Kuramoto-Sivashinsky equation $[0,L]$ with periodic boundary conditions\n", + "\n", + "\\begin{equation*}\n", + "u_t = - u_{xx} - u_{xxxx} - u u_{x}\n", + "\\end{equation*}\n", + "\n", + "on the domain $[0,L_x]$ with periodic boundary conditions and initial condition $u(x,0) = u_0(x)$. We will use a finite Fourier expansion to discretize space and finite-differencing to discretize time, specifically the 2nd-order rank-Nicolson/Adams-Bashforth (CNAB2) timestepping formula. CNAB2 is low-order but straightforward to describe and easy to implement for this simple benchmark.\n", + "\n", + "Write the KS equation as \n", + "\n", + "\\begin{equation*}\n", + "u_t = Lu + N(u)\n", + "\\end{equation*}\n", + "\n", + "where $Lu = - u_{xx} - u_{xxxx}$ is the linear terms and $N(u) = -u u_{x}$ is the nonlinear term. In practice we'll calculate the $N(u)$ in the equivalent form $N(u) = - 1/2 \\, d/dx \\, u^2$. \n", + "\n", + "Discretize time by letting $u^n(x) = u(x, n\\Delta t)$ for some small $\\Delta t$. The CNAB2 timestepping forumale approximates $u_t = Lu + N(u)$ at time $t = (n+1/2) \\, dt$ as \n", + "\n", + "\\begin{equation*}\n", + "\\frac{u^{n+1} - u^n}{\\Delta t} = L\\left(u^{n+1} + u^n\\right) + \\frac{3}{2} N(u^n) - \\frac{1}{2} N(u^{n-1})\n", + "\\end{equation*}\n", + "\n", + "\n", + "Put the unknown future $u^{n+1}$'s on the left-hand side of the equation and the present $u^{n}$ and past $u^{n+1}$ on the right.\n", + "\n", + "\\begin{equation*}\n", + "\\left(I - \\frac{\\Delta t}{2} L \\right) u^{n+1} = \\left(I + \\frac{\\Delta t}{2}L \\right) u^{n} + \\frac{3 \\Delta t}{2} N(u^n) - \\frac{\\Delta t}{2} N(u^{n-1})\n", + "\\end{equation*}\n", + "Note that the linear operator $L$ applies to the unknown $u^{n+1}$ on the LHS, but that the nonlinear operator $N$ applies only to the knowns $u^n$ and $u^{n-1}$ on the RHS. This is an *implicit* treatment of the linear terms, which keeps the algorithm stable for large time steps, and an *explicit* treament of the nonlinear term, which makes the timestepping equation linear in the unknown $u^{n+1}$.\n", + "\n", + "Now we discretize space with a finite Fourier expansion, so that $u$ now represents a vector of Fourier coefficients and $L$ turns into matrix (and a diagonal matrix, since Fourier modes are eigenfunctions of the linear operator). Let matrix $A = (I - \\Delta t/2 \\; L)$, matrix $B = (I + \\Delta t/2 \\; L)$, and let vector $N^n$ be the Fourier transform of a collocation calculation of $N(u^n)$. That is, $N^n$ is the Fourier transform of $- u u_x = - 1/2 \\, d/dx \\, u^2$ calculated at $N_x$ uniformly spaced gridpoints on the domain $[0, L_x]$. \n", + "\n", + "With the spatial discretization, then the CNAB2 timestepping formula becomes \n", + "\n", + "\\begin{equation*}\n", + "A \\, u^{n+1} = B \\, u^n + \\frac{3 \\Delta t}{2} N^n - \\frac{\\Delta t}{2}N^{n-1}\n", + "\\end{equation*}\n", + "\n", + "This is a simple $Ax=b$ linear algebra problem whose iteration approximates the time-evolution of the Kuramoto-Sivashinksy PDE. With the Fourier discretization, $A$ and $B$ are diagonal. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## KS-CNAB2 codes\n", + "\n", + "We implemented the KS-CNAB2 numerical integration algorithm in six languages: Python, Matlab, Julia, C, C++, and Fortran-90. All languages used the same FFTW library for Fourier transforms. \n", + "\n", + " * [ksbenchmark.py](ks-codes/ksbenchmark.py), Python\n", + " * [ksbenchmark.m](ks-codes/ksbenchmark.m), Matlab\n", + " * [ksbenchmark.jl](ks-codes/ksbenchmark.jl), Julia \n", + " * [ksbenchmark.c](ks-codes/ksbenchmark.c), C\n", + " * [ksbenchmark.cpp](ks-codes/ksbenchmark.cpp), C++ \n", + " * [ksbenchmark.f90](ks-codes/ksbenchmark.f90), Fortran\n", + "\n", + "\n", + "### Matlab\n", + "\n", + "The Matlab implementation is about 30 lines of code, with plotting and excluding whitespace and comments.\n", + "\n", + "```\n", + "function t,U = ksintegrate(u, Lx, dt, Nt, nsave)\n", + " Nx = length(u); % number of gridpoints\n", + " kx = [0:Nx/2-1 0 -Nx/2+1:-1]; % integer wavenumbers: exp(2*pi*kx*x/L)\n", + " alpha = 2*pi*kx/Lx; % real wavenumbers: exp(alpha*x)\n", + " D = i*alpha; % D = d/dx operator in Fourier space\n", + " L = alpha.^2 - alpha.^4; % linear operator -D^2 - D^3 in Fourier space\n", + " G = -0.5*D; % -1/2 D operator in Fourier space\n", + "\n", + " Nsave = round(Nt/nsave) + 1 % number of saved time steps\n", + " t = (0:Nsave)*(dt*nsave) % t timesteps\n", + " U = zeros(Nsave, Nx) % matrix of u(xⱼ, tᵢ) values\n", + " U(1,:) = u % assign initial condition to U\n", + " s = 2 % counter for saved data\n", + " \n", + " % some convenience variables\n", + " dt2 = dt/2;\n", + " dt32 = 3*dt/2;\n", + " A_inv = (ones(1,Nx) - dt2*L).^(-1);\n", + " B = ones(1,Nx) + dt2*L;\n", + "\n", + " % compute nonlinear term Nn = -u u_x\n", + " Nn = G.*fft(u.*u); % Nn = -1/2 d/dx u^2 = -u u_x\n", + " Nn1 = Nn; % Nn1 = Nn at first time step\n", + " u = fft(u); % transform u physical -> spectral\n", + " \n", + " % timestepping loop \n", + " for n = 1:Nt\n", + " \n", + " Nn1 = Nn; % shift nonlinear term in time\n", + " Nn = G.*fft(real(ifft(u)).^2); % compute Nn = N(u) = -1/2 d/dx u^2 = -u u_x\n", + "\n", + " % time-stepping eqn: Matlab generates six temp vectors to evaluate\n", + " u = A_inv .* (B .* u + dt32*Nn - dt2*Nn1);\n", + "\n", + " if mod(n, nsave) == 0\n", + " U(s,:) = real(ifft(u));\n", + " s = s+1; \n", + " end\n", + "\n", + "end\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Julia \n", + "\n", + "The Julia implementation is 38 lines of code, with plotting and excluding whitespace and comments. The 8 extra lines of code over Matlab are for in-place Fourier transforms." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ksintegrate (generic function with 1 method)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function ksintegrate(u0, Lx, dt, Nt, nsave);\n", + " u = (1+0im)*u0 # force u to be complex\n", + " Nx = length(u) # number of gridpoints\n", + " kx = vcat(0:Nx/2-1, 0:0, -Nx/2+1:-1)# integer wavenumbers: exp(2*pi*kx*x/L)\n", + " alpha = 2*pi*kx/Lx # real wavenumbers: exp(alpha*x)\n", + " D = 1im*alpha # spectral D = d/dx operator \n", + " L = alpha.^2 - alpha.^4 # spectral L = -D^2 - D^4 operator\n", + " G = -0.5*D # spectral -1/2 D operator\n", + " \n", + " Nsave = div(Nt, nsave)+1 # number of saved time steps, including t=0\n", + " t = (0:Nsave)*(dt*nsave) # t timesteps\n", + " U = zeros(Nsave, Nx) # matrix of u(xⱼ, tᵢ) values\n", + " U[1,:] = u # assign initial condition to U\n", + " s = 2 # counter for saved data\n", + " \n", + " # convenience variables\n", + " dt2 = dt/2\n", + " dt32 = 3*dt/2\n", + " A_inv = (ones(Nx) - dt2*L).^(-1)\n", + " B = ones(Nx) + dt2*L\n", + " \n", + " # compute in-place FFTW plans\n", + " FFT! = plan_fft!(u, flags=FFTW.ESTIMATE)\n", + " IFFT! = plan_ifft!(u, flags=FFTW.ESTIMATE)\n", + "\n", + " # compute nonlinear term Nn == -u u_x \n", + " Nn = G.*fft(u.^2); # Nn == -1/2 d/dx (u^2) = -u u_x\n", + " Nn1 = copy(Nn); # Nn1 = Nn at first time step\n", + " FFT!*u;\n", + " \n", + " # timestepping loop\n", + " for n = 1:Nt\n", + "\n", + " Nn1 .= Nn # shift nonlinear term in time\n", + " Nn .= u # put u into Nn in prep for comp of nonlinear term\n", + " \n", + " IFFT!*Nn; # transform Nn to gridpt values, in place\n", + " Nn .= Nn.*Nn; # collocation calculation of u^2\n", + " FFT!*Nn; # transform Nn back to spectral coeffs, in place\n", + "\n", + " Nn .= G.*Nn; # compute Nn = N(u) = -1/2 d/dx u^2 = -u u_x\n", + "\n", + " # loop fusion! Julia translates this line into a single for-loop on\n", + " # u[i] = A_inv[i] * (B[i]*u[i] + dt32*Nn[i] - dt2*Nn1[i]; \n", + " # no temporary vectors! \n", + "\n", + " u .= A_inv .* (B .* u .+ dt32.*Nn .- dt2.*Nn1); \n", + " \n", + " if mod(n, nsave) == 0\n", + " U[s,:] = real(ifft(u))\n", + " s += 1 \n", + " end\n", + " end\n", + " \n", + " t,U\n", + "end" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Execute the Julia code" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "LoadError", + "evalue": "\u001b[91mUndefVarError: heatmap not defined\u001b[39m", + "output_type": "error", + "traceback": [ + "\u001b[91mUndefVarError: heatmap not defined\u001b[39m", + "" + ] + } + ], + "source": [ + "# set parameters\n", + "Lx = 64*pi\n", + "Nx = 1024\n", + "dt = 1/16\n", + "nsave = 8\n", + "Nt = 3200\n", + "\n", + "# set initial condition and run simulation\n", + "x = Lx*(0:Nx-1)/Nx\n", + "u = cos.(x) + 0.1*sin.(x/8) + 0.01*cos.((2*pi/Lx)*x)\n", + "t,U = ksintegrate(u, Lx, dt, Nt, nsave);\n", + "\n", + "# plot results\n", + "heatmap(x,t,U, xlim=(x[1], x[end]), ylim=(t[1], t[end]), xlabel=\"x\", ylabel=\"t\", title=\"u(x,t)\", fillcolor=:bluesreds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Benchmark results: execution time versus number of gridpoints $N_x$" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "10^1\n", + "\n", + "\n", + "10^2\n", + "\n", + "\n", + "10^3\n", + "\n", + "\n", + "10^4\n", + "\n", + "\n", + "10^5\n", + "\n", + "\n", + "10^-3\n", + "\n", + "\n", + "10^-2\n", + "\n", + "\n", + "10^-1\n", + "\n", + "\n", + "10^0\n", + "\n", + "\n", + "10^1\n", + "\n", + "\n", + "10^2\n", + "\n", + "\n", + "Nx\n", + "\n", + "\n", + "cpu time\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Python\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Matlab\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Julia\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Julia unrolled\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "C\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "C++\n", + "\n", + "\n", + "\n", + "Fortran\n", + "\n", + "\n", + "\n", + "Nx log Nx\n", + "\n", + "\n" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "makescalingplot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Expectation of $N_x \\log N_X$ scaling**. Execution time for this algorithm should ideally be dominated by the $N_x \\log N_x$ cost of the FFTs. In the above plot, all the codes do appear to scale as $N_x \\log N_x$ at large $N_x$ with different prefactors. \n", + "\n", + "**Two Julia codes.** \n", + "\n", + " * **Julia** code is nearly a line-by-line translation of the Matlab code, but it eliminates temporary vectors in the inner loop by using in-place FFTs and julia-0.6's loop fusion capability.\n", + " \n", + " * **Julia unrolled** unrolls all the vector operations into explicit for loops. \n", + " \n", + "\n", + "**Julia, C, C++, Fortran results are practically identical.** The two Julia codes, Fortran, C, and C++ beat naive Python and Matlab handily. Julia is close to C and C++ (factor of 1.06), and Julia unrolled is close to Fortran (factor of 1.03). Execution times of Julia, Julia unrolled, C, C++, and Fortran are all pretty close, about a 15% spread. The benchmarks were averaged over thousands of runs for $N_x = 32$ scaling down to 8 runs for $N_x = 2^{17}$. \n", + "\n", + "\n", + "**CPU, OS, and compiler:** All benchmarks were run single-threaded on a six-core Intel Core i7-3960X CPU @ 3.30GHz with 64 GB memory running openSUSE Leap 42.2. C and C++ were compiled with clang 3.8.0, Fortran with gfortran 4.8.3, Julia was julia-0.7-DEV, and all used optimization ``-O3``. For more details see [benchmark-data/cputime.asc](benchmark-data/cputime.asc). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Benchmark results: execution time versus line count" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "20\n", + "\n", + "\n", + "40\n", + "\n", + "\n", + "60\n", + "\n", + "\n", + "80\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "1\n", + "\n", + "\n", + "2\n", + "\n", + "\n", + "3\n", + "\n", + "\n", + "lines of code\n", + "\n", + "\n", + "cpu time, C = 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Python\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Matlab\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Julia\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Julia unrolled\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Fortran\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "C++\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "C\n", + "\n", + "\n" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "makelinecountplot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Julia clearly hits the sweet spot of low execution time and low line count. The extra lines in Julia over Matlab are for in-place FFTs. These linecounts exclude whitespace, comments, and plotting. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 0.6.4", + "language": "julia", + "name": "julia-0.6" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "0.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/kuramoto_sivashinksy/codes/count_heads.jl b/kuramoto_sivashinksy/codes/count_heads.jl new file mode 100644 index 0000000..3e95ff9 --- /dev/null +++ b/kuramoto_sivashinksy/codes/count_heads.jl @@ -0,0 +1,7 @@ +function count_heads(n) + c::Int = 0 + for i=1:n + c += rand(Bool) + end + c +end diff --git a/kuramoto_sivashinksy/codes/f.cpp b/kuramoto_sivashinksy/codes/f.cpp new file mode 100644 index 0000000..f72baa6 --- /dev/null +++ b/kuramoto_sivashinksy/codes/f.cpp @@ -0,0 +1,4 @@ +double f(double x) { + return 4*x*(1-x); +} + diff --git a/kuramoto_sivashinksy/codes/fN.cpp b/kuramoto_sivashinksy/codes/fN.cpp new file mode 100644 index 0000000..0579a2d --- /dev/null +++ b/kuramoto_sivashinksy/codes/fN.cpp @@ -0,0 +1,26 @@ +#include +#include +#include +#include + +using namespace std; + +inline double f(double x) { + return 4*x*(1-x); +} + +int main(int argc, char* argv[]) { + double x = argc > 1 ? atof(argv[1]) : 0.0; + + double t0 = clock(); + for (int n=0; n<1000000; ++n) + x = f(x); + double t1 = clock(); + + cout << "t = " << (t1-t0)/CLOCKS_PER_SEC << " seconds" << endl; + cout << setprecision(17); + cout << "x = " << x << endl; + + return 0; +} + diff --git a/kuramoto_sivashinksy/codes/ks-r2c.f90 b/kuramoto_sivashinksy/codes/ks-r2c.f90 new file mode 100644 index 0000000..69a4570 --- /dev/null +++ b/kuramoto_sivashinksy/codes/ks-r2c.f90 @@ -0,0 +1,108 @@ + module KS + implicit none + save + integer, parameter :: Nx = 8192 + double precision, parameter :: dt = 1d0/16d0 + double precision, parameter :: T = 200d0 + double precision, parameter :: pi = 3.14159265358979323846d0 + double precision, parameter :: Lx = (pi/16d0)*Nx + end module KS + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + program main + use KS + implicit none + integer :: i,Nt,r, Nruns=5, skip=1 + double precision :: x(0:Nx-1), u(0:Nx-1) + real :: tstart, tend, avgtime + + Nt = floor(T/dt) + x = Lx * (/(i,i=0,Nx-1)/) / dble(Nx) + u = cos(x) + 0.2d0*sin(x/8d0) + 0.01d0*cos(x/16d0) + + avgtime = 0. + do r = 1, Nruns + call cpu_time(tstart) + call ksintegrate(Nt, u) + call cpu_time(tend) + if(r>skip) avgtime = avgtime + tend-tstart + end do + avgtime = avgtime/(Nruns-skip) + print*, 'avgtime (seconds) = ', avgtime + + print*, 'norm(u) = ', norm(u) + + open(10, status='unknown', file='u.dat') + write(10,'(2e20.12)') (x(i),u(i), i=0,Nx-1) + close(10) + + end program main + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + double precision function norm(us,Nx) + double complex, intent(in) :: us(0:Nx-1) + norm = sqrt(sum(abs(us))/Nx) + end function norm + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + subroutine ksintegrate(Nt, u) + use KS + implicit none + integer, intent(in) :: Nt + double precision, intent(inout) :: u(0:Nx-1) + double precision :: dt2, dt32, Nx_inv + double precision :: A(0:Nx-1), B(0:Nx-1), L(0:Nx-1), kx(0:Nx-1) + double complex :: alpha(0:Nx-1), G(0:Nx-1), Nn(0:Nx-1), Nn1(0:Nx-1) + double precision, save :: u_(0:Nx-1), uu(0:Nx-1) + double complex, save :: us(0:Nx-1), uus(0:Nx-1) + logical, save :: planned=.false. + integer*8 :: plan_u2us, plan_us2u, plan_uu2uus, plan_uus2uu + integer :: n, fftw_patient=32, fftw_estimate=64 + + if(.not.planned) then + call dfftw_plan_dft_r2c_1d(plan_u2us, Nx, u_, us, fftw_estimate) + call dfftw_plan_dft_c2r_1d(plan_us2u, Nx, us, u_, fftw_estimate) + call dfftw_plan_dft_r2c_1d(plan_uu2uus, Nx, uu, uus, fftw_estimate) + call dfftw_plan_dft_c2r_1d(plan_uus2uu, Nx, uus, uu, fftw_estimate) + planned = .true. + end if + + do n = 0, Nx/2-1 + kx(n) = n + end do + kx(Nx/2) = 0d0 + do n = Nx/2+1, Nx-1 + kx(n) = -Nx + n; + end do + alpha = (2d0*pi/Lx)*kx + L = alpha**2 - alpha**4 + G = -0.5d0*dcmplx(0d0,1d0)*alpha + + Nx_inv = 1d0/Nx + dt2 = dt/2d0 + dt32 = 3d0*dt/2d0 + A = 1d0 + dt2*L + B = 1d0/(1d0 - dt2*L) + + uu = u*u + call dfftw_execute(plan_uu2uus) + Nn = G*uus + Nn1 = Nn + + u_ = u + call dfftw_execute(plan_u2us) + + do n = 1, Nt + Nn1 = Nn + call dfftw_execute(plan_us2u) + u_ = u_ * Nx_inv + uu = u_*u_ + call dfftw_execute(plan_uu2uus) + Nn = G*uus + us = B * (A*us + dt32*Nn - dt2*Nn1) + end do + + call dfftw_execute(plan_us2u) + u = u_ * Nx_inv + + end subroutine ksintegrate diff --git a/kuramoto_sivashinksy/codes/ks.mod b/kuramoto_sivashinksy/codes/ks.mod new file mode 100644 index 0000000..ad2ed2b Binary files /dev/null and b/kuramoto_sivashinksy/codes/ks.mod differ diff --git a/kuramoto_sivashinksy/codes/ksbenchmark.c b/kuramoto_sivashinksy/codes/ksbenchmark.c new file mode 100644 index 0000000..538d171 --- /dev/null +++ b/kuramoto_sivashinksy/codes/ksbenchmark.c @@ -0,0 +1,174 @@ +#include +#include +#include +#include +#include +#include +//#include + +typedef complex double Complex; + +void ksintegrate(Complex* u, int Nx, double Lx, double dt, int Nt); +double ksnorm(Complex* u, int Nx); + +const double pi = 3.14159265358979323846; /* why doesn't M_PI from math.j work? :-( */ + +int main(int argc, char* argv[]) { + + /* to be set as command-line args */ + int Nx = 0; /* number of x gridpoints */ + int Nruns = 5; /* number of benchmarking runs */ + int printnorms = 0; /* can't figure out C bool type :-( */ + + if (argc < 2) { + printf("please provide one integer argument Nx\n"); + exit(1); + } + Nx = atoi(argv[1]); + + if (argc >= 3) + Nruns = atoi(argv[2]); + + if (argc >= 4) + printnorms = atoi(argv[3]); + + printf("Nx == %d\n", Nx); + + double dt = 1.0/16.0; + double T = 200.0; + double Lx = (pi/16.0)*Nx; + double dx = Lx/Nx; + int Nt = (int)(T/dt); + + double* x = malloc(Nx*sizeof(double)); + Complex* u0 = malloc(Nx*sizeof(Complex)); + Complex* u = malloc(Nx*sizeof(Complex)); + + for (int n=0; n= skip) + avgtime += cputime; + + printf("cputtime == %f\n", cputime); + } + printf("norm(u(0)) == %f\n", ksnorm(u0, Nx)); + printf("norm(u(T)) == %f\n", ksnorm(u, Nx)); + + + avgtime /= (Nruns-skip); + printf("avgtime == %f\n", avgtime); + + free(u0); + free(u); + free(x); +} + +void ksintegrate(Complex* u, int Nx, double Lx, double dt, int Nt) { + int* kx = malloc(Nx*sizeof(int)); + double* alpha = malloc(Nx*sizeof(double)); + Complex* D = malloc(Nx*sizeof(Complex)); + Complex* G = malloc(Nx*sizeof(Complex)); + double* L = malloc(Nx*sizeof(double)); + + for (int n=0; n spectral */ + for (int n=0; n +#include +#include +#include +#include +#include +#include + +using namespace std; + +typedef std::complex Complex; + +void ksintegrate(Complex* u, int Nx, double Lx, double dt, int Nt); +double ksnorm(Complex* u, int Nx); + +int main(int argc, char* argv[]) { + + // to be set as command-line args + int Nx = 0; + int Nruns = 5; + bool printnorms = false; + + if (argc < 2) { + cout << "please provide one integer argument Nx\n [two optional args: Nruns printnorm]" << endl; + exit(1); + } + Nx = atoi(argv[1]); + + if (argc >= 3) + Nruns = atoi(argv[2]); + + if (argc >= 4) + printnorms = bool(atoi(argv[3])); + + cout << "Nx == " << Nx << endl; + + double dt = 1.0/16.0; + double T = 200.0; + double Lx = (M_PI/16.0)*Nx; + double dx = Lx/Nx; + int Nt = (int)(T/dt); + + double* x = new double[Nx]; + Complex* u0 = new Complex[Nx]; + Complex* u = new Complex[Nx]; + + for (int n=0; n= skip) + avgtime += cputime; + + printf("cputtime == %f\n", cputime); + } + printf("norm(u(0)) == %f\n", ksnorm(u0, Nx)); + printf("norm(u(T)) == %f\n", ksnorm(u, Nx)); + + + avgtime /= (Nruns-skip); + printf("avgtime == %f\n", avgtime); + + delete[] u0; + delete[] u; + delete[] x; +} + +void ksintegrate(Complex* u, int Nx, double Lx, double dt, int Nt) { + int* kx = new int[Nx]; + double* alpha = new double[Nx]; + Complex* D = new Complex[Nx]; + Complex* G = new Complex[Nx]; + double* L = new double[Nx]; + + for (int n=0; n(u); + fftw_complex* uu_fftw = reinterpret_cast(uu); + fftw_plan u_fftw_plan = fftw_plan_dft_1d(Nx, u_fftw, u_fftw, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_plan u_ifftw_plan = fftw_plan_dft_1d(Nx, u_fftw, u_fftw, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_plan uu_fftw_plan = fftw_plan_dft_1d(Nx, uu_fftw, uu_fftw, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_plan uu_ifftw_plan = fftw_plan_dft_1d(Nx, uu_fftw, uu_fftw, FFTW_BACKWARD, FFTW_ESTIMATE); + + fftw_execute(uu_fftw_plan); + Complex* Nn = new Complex[Nx]; + for (int n=0; n spectral + for (int n=0; n skip + avgtime = avgtime + cputime + end + end + + + if printnorms + @show ksnorm(u0) + @show ksnorm(u) + end + + avgtime = avgtime/(Nruns-skip) + @show avgtime + +end + +function ksnorm(u) + s = 0.0 + for n = 1:length(u) + s += (u[n])^2 + end + sqrt(s/length(u)) +end + +""" +ksintegrateNaive: integrate kuramoto-sivashinsky equation (Julia) + u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs + + inputs + u = initial condition (vector of u(x) values on uniform gridpoints)) + Lx = domain length + dt = time step + Nt = number of integration timesteps + nsave = save every nsave-th time step + + outputs + u = final state, vector of u(x, Nt*dt) at uniform x gridpoints + +This a line-by-line translation of a Matlab code into Julia. It uses out-of-place +FFTs and doesn't pay any attention to the allocation of temporary vectors within +the time-stepping loop. Hence the name "naive". +""" +function ksintegrateNaive(u, Lx, dt, Nt) + Nx = length(u) # number of gridpoints + x = collect(0:(Nx-1)/Nx)*Lx + kx = vcat(0:Nx/2-1, 0, -Nx/2+1:-1) # integer wavenumbers: exp(2*pi*kx*x/L) + alpha = 2*pi*kx/Lx # real wavenumbers: exp(alpha*x) + D = 1im*alpha # D = d/dx operator in Fourier space + L = alpha.^2 - alpha.^4 # linear operator -D^2 - D^4 in Fourier space + G = -0.5*D # -1/2 D operator in Fourier space + + # Express PDE as u_t = Lu + N(u), L is linear part, N nonlinear part. + # Then Crank-Nicolson Adams-Bashforth discretization is + # + # (I - dt/2 L) u^{n+1} = (I + dt/2 L) u^n + 3dt/2 N^n - dt/2 N^{n-1} + # + # let A = (I - dt/2 L) + # B = (I + dt/2 L), then the CNAB timestep formula + # + # u^{n+1} = A^{-1} (B u^n + 3dt/2 N^n - dt/2 N^{n-1}) + + # convenience variables + dt2 = dt/2 + dt32 = 3*dt/2 + A = ones(Nx) + dt2*L + B = (ones(Nx) - dt2*L).^(-1) + + Nn = G.*fft(u.*u) # -u u_x (spectral), notation Nn = N^n = N(u(n dt)) + Nn1 = copy(Nn) # notation Nn1 = N^{n-1} = N(u((n-1) dt)) + u = fft(u) # transform u to spectral + + # timestepping loop + for n = 1:Nt + Nn1 = copy(Nn) # shift nonlinear term in time: N^{n-1} <- N^n + Nn = G.*fft(real(ifft(u)).^2) # compute Nn = -u u_x + + u = B .* (A .* u + dt32*Nn - dt2*Nn1) + end + + real(ifft(u)) +end + +""" +ksintegrate: integrate kuramoto-sivashinsky equation (Julia) + u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs + + inputs + u = initial condition (vector of u(x) values on uniform gridpoints)) + Lx = domain length + dt = time step + Nt = number of integration timesteps + nsave = save every nsave-th time step + + outputs + + u = final state, vector of u(x, Nt*dt) at uniform x gridpoints + +This implementation has two improvements over ksintegrateNaive.jl. It uses + (1) in-place FFTs + (2) loop fusion: Julia can translate arithmetic vector expressions in dot syntax + to single for loop over the components, which should be much faster than + constructing a temporary vector for each operation in the vector expression. +""" +function ksintegrateInplace(u, Lx, dt, Nt) + u = (1+0im)*u # force u to be complex + Nx = length(u) # number of gridpoints + kx = vcat(0:Nx/2-1, 0:0, -Nx/2+1:-1)# integer wave #s, exp(2*pi*i*kx*x/L) + alpha = 2*pi*kx/Lx # real wavenumbers, exp(i*alpha*x) + D = 1im*alpha # spectral D = d/dx operator + L = alpha.^2 - alpha.^4 # spectral L = -D^2 - D^4 operator + G = -0.5*D # spectral -1/2 D operator, to eval -u u_x = 1/2 d/dx u^2 + + # convenience variables + dt2 = dt/2 + dt32 = 3*dt/2 + A_inv = (ones(Nx) - dt2*L).^(-1) + B = ones(Nx) + dt2*L + + # compute FFTW plans + FFT! = plan_fft!(u, flags=FFTW.ESTIMATE) + IFFT! = plan_ifft!(u, flags=FFTW.ESTIMATE) + + # compute nonlinear term Nu == -u u_x and Nuprev (Nu at prev timestep) + Nu = G.*fft(u.^2) # Nu == -1/2 d/dx (u^2) = -u u_x + Nuprev = copy(Nu) # use Nuprev = Nu at first time step + FFT!*u + + # timestepping loop + for n = 1:Nt + + Nuprev .= Nu # shift nonlinear term in time + Nu .= u # put u into N in prep for comp of nonlineat + + IFFT!*Nu # transform Nu to gridpt values, in place + Nu .= Nu.*Nu # collocation calculation of u^2 + FFT!*Nu # transform Nu back to spectral coeffs, in place + + Nu .= G.*Nu + + # loop fusion! Julia translates the folling line of code to a single for loop. + u .= A_inv .*(B .* u .+ dt32.*Nu .- dt2.*Nuprev) + end + + IFFT!*u + real(u) +end + +""" +ksintegrateUnrolled: integrate kuramoto-sivashinsky equation (Julia) + + + u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs + + inputs + u = initial condition (vector of u(x) values on uniform gridpoints)) + Lx = domain length + dt = time step + Nt = number of integration timesteps + nsave = save every nsave-th time step + + outputs + u = final state, vector of u(x, Nt*dt) at uniform x gridpoints + +This implementation has one improvements over ksintegrateInplace.jl. It explicitly +unrolls all the vector equations into for loops. For some reason this works +better (runs faster) than the unrolled loops produced by the Julia compiler. +""" +function ksintegrateUnrolled(u, Lx, dt, Nt) + u = (1+0im)*u # force u to be complex + Nx = length(u) # number of gridpoints + kx = vcat(0:Nx/2-1, 0:0, -Nx/2+1:-1)# integer wavenumbers: exp(2*pi*kx*x/L) + alpha = 2*pi*kx/Lx # real wavenumbers: exp(alpha*x) + D = 1im*alpha # spectral D = d/dx operator + L = alpha.^2 - alpha.^4 # spectral L = -D^2 - D^4 operator + G = -0.5*D # spectral -1/2 D operator, to eval -u u_x = 1/2 d/dx u^2 + + # convenience variables + dt2 = dt/2 + dt32 = 3*dt/2 + A = ones(Nx) + dt2*L + B = (ones(Nx) - dt2*L).^(-1) + + # compute FFTW plans + FFT! = plan_fft!(u, flags=FFTW.ESTIMATE) + IFFT! = plan_ifft!(u, flags=FFTW.ESTIMATE) + + # compute nonlinear term Nu == -u u_x and Nuprev (Nu at prev timestep) + Nn = G.*fft(u.^2) # Nnf == -1/2 d/dx (u^2) = -u u_x, spectral + Nn1 = copy(Nn) # use Nnf1 = Nnf at first time step + FFT!*u + + # timestepping loop + for n = 1:Nt + + Nn1 .= Nn + Nn .= u + + IFFT!*Nn # in-place FFT + + @inbounds for i = 1:length(Nn) + @fastmath Nn[i] = Nn[i]*Nn[i] + end + + FFT!*Nn + + @inbounds for i = 1:length(Nn) + @fastmath Nn[i] = G[i]*Nn[i] + end + + @inbounds for i = 1:length(u) + @fastmath u[i] = B[i]* (A[i] * u[i] + dt32*Nn[i] - dt2*Nn1[i]) + end + + end + + IFFT!*u + real(u) +end + +""" + Construct initial conditions for benchmarking ksintegrate* algorithms + Useful for using Julia's benchmark utilities. +""" +function ksinitconds(Nx) + Lx = Nx/16*pi # spatial domain [0, L] periodic + dt = 1/16 # discrete time step + T = 200 # integrate from t=0 to t=T + nplot = round(Int,1/dt) # save every nploth time step + Nt = round(Int, T/dt) # total number of timesteps + + x = Lx*(0:Nx-1)/Nx + u0 = cos.(x) + 0.1*sin.(x/8) + 0.01*cos.(x/16) + + u0, Lx, dt, Nt +end diff --git a/kuramoto_sivashinksy/codes/ksbenchmark.m b/kuramoto_sivashinksy/codes/ksbenchmark.m new file mode 100644 index 0000000..8df3d18 --- /dev/null +++ b/kuramoto_sivashinksy/codes/ksbenchmark.m @@ -0,0 +1,98 @@ +function ksbenchmark(Nx, printnorms) +% ksbenchmark: run a Kuramoto-Sivashinky simulation, benchmark, and plot +% Nx = number of gridpoints +% printnorms = 1 => print norm(u0) and norm(uT), 0 => don't + + Lx = Nx/16*pi; % spatial domain [0, L] periodic + dt = 1/16; % discrete time step + T = 200; % integrate from t=0 to t=T + Nt = floor(T/dt); % total number of timesteps + + if nargin < 2 + printnorms = 0; + end + + x = (Lx/Nx)*(0:Nx-1); + u0 = cos(x) + 0.1*sin(x/8) + 0.01*cos((2*pi/Lx)*x); + + Nruns = 1; + skip = 1; + avgtime = 0; + + for r=1:Nruns; + tic(); + u = ksintegrate(u0, Lx, dt, Nt); + cputime = toc() + if r > skip + avgtime = avgtime + cputime; + end + end + + if printnorms == 1 + u0norm = ksnorm(u0) + uTnorm = ksnorm(u) + end + + avgtime = avgtime/(Nruns-skip) + +end + + +function n = ksnorm(u) +% ksnorm: compute the 2-norm of u(x) = sqrt(1/Lx int_0^Lx |u|^2 dx) + n = sqrt((u * u') /length(u)); +end + +function u = ksintegrate(u, Lx, dt, Nt) +% ksintegrate: integrate kuramoto-sivashinsky equation +% u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs +% +% inputs +% u = initial condition (vector of u(x,0) values on uniform gridpoints)) +% Lx = domain length +% dt = time step +% Nt = number of integration timesteps +% +% outputs +% +% u = final state (vector of u(x,T) values on uniform gridpoints)) + + Nx = length(u); % number of gridpoints + kx = [0:Nx/2-1 0 -Nx/2+1:-1]; % integer wavenumbers: exp(2*pi*kx*x/L) + alpha = 2*pi*kx/Lx; % real wavenumbers: exp(alpha*x) + D = i*alpha; % D = d/dx operator in Fourier space + L = alpha.^2 - alpha.^4; % linear operator -D^2 - D^3 in Fourier space + G = -0.5*D; % -1/2 D operator in Fourier space + + % Express PDE as u_t = Lu + N(u), L is linear part, N nonlinear part. + % Then Crank-Nicolson Adams-Bashforth discretization is + % + % (I - dt/2 L) u^{n+1} = (I + dt/2 L) u^n + 3dt/2 N^n - dt/2 N^{n-1} + % + % let A = (I - dt/2 L) + % B = (I + dt/2 L), then the CNAB timestep formula is + % + % u^{n+1} = A^{-1} (B u^n + 3dt/2 N^n - dt/2 N^{n-1}) + + % some convenience variables + dt2 = dt/2; + dt32 = 3*dt/2; + A_inv = (ones(1,Nx) - dt2*L).^(-1); + B = ones(1,Nx) + dt2*L; + + Nn = G.*fft(u.*u); % compute -1/2 d/dx u^2 (spectral), notation Nn = N^n = N(u(n dt)) + Nn1 = Nn; % notation Nn1 = N^{n-1} = N(u((n-1) dt)) + u = fft(u); % transform u (spectral) + + % timestepping loop + for n = 1:Nt + + Nn1 = Nn; % shift N(u) in time: N^{n-1} <- N^n + Nn = G.*fft(real(ifft(u)).^2); % compute Nn = N(u) = -1/2 d/dx u^2 + + u = A_inv .* (B .* u + dt32*Nn - dt2*Nn1); + + end + u = real(ifft(u)); +end + diff --git a/kuramoto_sivashinksy/codes/ksbenchmark.py b/kuramoto_sivashinksy/codes/ksbenchmark.py new file mode 100644 index 0000000..667743e --- /dev/null +++ b/kuramoto_sivashinksy/codes/ksbenchmark.py @@ -0,0 +1,116 @@ +from numpy import * +from numpy.fft import rfft as fft +from numpy.fft import irfft as ifft +import timeit + +from transonic import jit, wait_for_all_extensions + + +def ksnorm(u) : + """ksnorm(u) : sqrt(1/Lx int_0^Lx u^2 dx)""" + N = size(u) + s = 0 + for i in range(0, N) : + s += u[i]*u[i] + return sqrt(s/N) + + +def ksbenchmark(Nx, printnorm=False) : + """ksbenchmark: benchmark the KS-CNAB2 algorithm for Nx gridpoints""" + + Lx = Nx/16*pi # spatial domain [0, L] periodic + dt = 1.0/16 # discrete time step + T = 200.0 # integrate from t=0 to t=T + Nt = T/dt # total number of time steps + + x = (Lx/Nx)*arange(0,Nx) + u0 = cos(x) + 0.1*sin(x/8) + 0.01*cos((2*pi/Lx)*x); + u = 0 + + Nruns = 5 + skip = 1 + avgtime = 0 + + for n in range(Nruns) : + tic = timeit.default_timer() + u = ksintegrate(u0,Lx,dt, Nt) + toc = timeit.default_timer() + print ("cputime == ", toc - tic) + avgtime += toc - tic + + avgtime /= Nruns + + if printnorm : + print("norm(u(0)) == ", ksnorm(u0)) + print("norm(u(T)) == ", ksnorm(u)) + + print("avgtime == ", avgtime) + + +@jit +def ksintegrate(u, Lx, dt, Nt): + """ksintegrate: integrate kuramoto-sivashinsky equation (Python) + u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs + + inputs + u = initial condition (vector of u(x) values on uniform gridpoints)) + Lx = domain length + dt = time step + Nt = number of integration timesteps + nsave = save every nsave-th time step + + outputs + u = final state, vector of u(x, Nt*dt) at uniform x gridpoints + """ + + Nx = size(u); + kx = concatenate((arange(0,Nx/2), array([0]), arange(-Nx/2+1,0))) # int wavenumbers: exp(2*pi*kx*x/L) + alpha = 2*pi*kx/Lx; # real wavenumbers: exp(alpha*x) + D = 1j*alpha; # D = d/dx operator in Fourier space + L = pow(alpha,2) - pow(alpha,4); # linear operator -D^2 - D^3 in Fourier space + G = -0.5*D; # -1/2 D operator in Fourier space + + # Express PDE as u_t = Lu + N(u), L is linear part, N nonlinear part. + # Then Crank-Nicolson Adams-Bashforth discretization is + # + # (I - dt/2 L) u^{n+1} = (I + dt/2 L) u^n + 3dt/2 N^n - dt/2 N^{n-1} + # + # let A = (I - dt/2 L) + # B = (I + dt/2 L), then the CNAB timestep formula + # + # u^{n+1} = A^{-1} (B u^n + 3dt/2 N^n - dt/2 N^{n-1}) + + # some convenience variables + dt2 = dt/2; + dt32 = 3*dt/2; + A = ones(Nx) + dt2*L; + B = 1.0/(ones(Nx) - dt2*L) + + Nn = G*fft(u*u); # compute -u u_x (spectral), notation Nn = N^n = N(u(n dt)) + Nn1 = Nn; # notation Nn1 = N^{n-1} = N(u((n-1) dt)) + u = fft(u); # transform u (spectral) + + # timestepping loop + for n in range(0,int(Nt)) : + + Nn1 = Nn; # shift nonlinear term in time: N^{n-1} <- N^n + uu = real(ifft(u)) + uu = uu*uu + uu = fft(uu) + Nn = G*uu # compute Nn == -u u_x (spectral) + + u = B * (A * u + dt32*Nn - dt2*Nn1); + + return real(ifft(u)); + + +if __name__ == "__main__": + import sys + # execute only if run as a script + print(sys.argv[1]) + #print sys.argv[2] + + ksbenchmark(int(sys.argv[1])) + wait_for_all_extensions() + ksbenchmark(int(sys.argv[1]), printnorm=True) + diff --git a/kuramoto_sivashinksy/codes/ksintegrate.cpp b/kuramoto_sivashinksy/codes/ksintegrate.cpp new file mode 100644 index 0000000..5f3766c --- /dev/null +++ b/kuramoto_sivashinksy/codes/ksintegrate.cpp @@ -0,0 +1,133 @@ +typedef std::complex Complex; + +// ksintegrate: integrate kuramoto-sivashinsky equation (Python) +// u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs + +// inputs +// u = initial condition (vector of u(x) values on uniform gridpoints)) +// Lx = domain length +// dt = time step +// Nt = number of integration timesteps +// nsave = save every nsave-th time step +// +// outputs +// u = final state, vector of u(x, Nt*dt) at uniform x gridpoints + +void ksintegrate(Complex* u, const int Nx, const double Lx, const double dt, const int Nt) { + + int* kx = new int[Nx]; // for Nx=8, kx = [0 1 2 3 0 -3 -2 -1] + double* alpha = new double[Nx]; // real wavenumbers: exp(alpha*x) + Complex* D = new Complex[Nx]; // D = d/dx operator in Fourier space + Complex* G = new Complex[Nx]; // -1/2 D operator in Fourier space + double* L = new double[Nx]; // -D^2 - D^4 + + // Assign Fourier wave numbers in order that FFTW produces + for (int n=0; n(u); + fftw_complex* uu_fftw = reinterpret_cast(uu); + + // Construct FFTW plans + fftw_plan u_fftw_plan = fftw_plan_dft_1d(Nx, u_fftw, u_fftw, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_plan u_ifftw_plan = fftw_plan_dft_1d(Nx, u_fftw, u_fftw, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_plan uu_fftw_plan = fftw_plan_dft_1d(Nx, uu_fftw, uu_fftw, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_plan uu_ifftw_plan = fftw_plan_dft_1d(Nx, uu_fftw, uu_fftw, FFTW_BACKWARD, FFTW_ESTIMATE); + + // Compute Nn = G*fft(u*u), (-u u_x, spectral) + fftw_execute(uu_fftw_plan); // uu physical -> spectral + + Complex* Nn = new Complex[Nx]; + for (int n=0; n spectral + + // timestepping loop + for (int s=0; s physical + for (int n=0; n spectral + + for (int n=0; n spectral + for (int n=0; n spectral + + # timestepping loop + for n = 1:Nt + + Nn1 .= Nn # shift nonlinear term in time + + Nn .= u # put u into Nn + IFFT!*Nn; # transform Nn = u to gridpt values, in place + Nn .= Nn.*Nn; # collocation calculation, set Nn = u^2 + FFT!*Nn; # transform Nn = u^2 back to spectral coeffs + Nn .= G.*Nn; # apply G = -1/2d/dx to compute Nn = -1/2 d/dx (u^2) + + # loop fusion! Julia translates this line into a single for-loop on + # u[i] = A_inv[i] * (B[i]*u[i] + dt32*Nn[i] - dt2*Nn1[i]; + # no temporary vectors! + + u .= A_inv .* (B .* u .+ dt32.*Nn .- dt2.*Nn1); + + if mod(n, nsave) == 0 + U[s,:] = real(ifft(u)) + s += 1 + end + end + + t,U +end diff --git a/kuramoto_sivashinksy/codes/ksintegrate.m b/kuramoto_sivashinksy/codes/ksintegrate.m new file mode 100644 index 0000000..7d8a731 --- /dev/null +++ b/kuramoto_sivashinksy/codes/ksintegrate.m @@ -0,0 +1,40 @@ +function t,U = ksintegrate(u, Lx, dt, Nt, nsave) + Nx = length(u); % number of gridpoints + kx = [0:Nx/2-1 0 -Nx/2+1:-1]; % integer wavenumbers: exp(2*pi*kx*x/L) + alpha = 2*pi*kx/Lx; % real wavenumbers: exp(alpha*x) + D = i*alpha; % D = d/dx operator in Fourier space + L = alpha.^2 - alpha.^4; % linear operator -D^2 - D^3 in Fourier space + G = -0.5*D; % -1/2 D operator in Fourier space + + Nsave = round(Nt/nsave) + 1 % number of saved time steps + t = (0:Nsave)*(dt*nsave) % t timesteps + U = zeros(Nsave, Nx) % matrix of u(xⱼ, tᵢ) values + U(1,:) = u % assign initial condition to U + s = 2 % counter for saved data + + % some convenience variables + dt2 = dt/2; + dt32 = 3*dt/2; + A_inv = (ones(1,Nx) - dt2*L).^(-1); + B = ones(1,Nx) + dt2*L; + + % compute nonlinear term Nn = -u u_x + Nn = G.*fft(u.*u); % Nn = -1/2 d/dx u^2 = -u u_x + Nn1 = Nn; % Nn1 = Nn at first time step + u = fft(u); % transform u physical -> spectral + + % timestepping loop + for n = 1:Nt + + Nn1 = Nn; % shift nonlinear term in time + Nn = G.*fft(real(ifft(u)).^2); % compute Nn = N(u) = -u u_x + + % time-stepping eqn: Matlab generates six temp vectors to evaluate + u = A_inv .* (B .* u + dt32*Nn - dt2*Nn1); + + if mod(n, nsave) == 0 + U(s,:) = real(ifft(u)); + s = s+1; + end + +end diff --git a/kuramoto_sivashinksy/codes/ksintegrate.py b/kuramoto_sivashinksy/codes/ksintegrate.py new file mode 100644 index 0000000..f5078fe --- /dev/null +++ b/kuramoto_sivashinksy/codes/ksintegrate.py @@ -0,0 +1,51 @@ +def ksintegrate(u, Lx, dt, Nt) : + """ksintegrate: integrate kuramoto-sivashinsky equation (Python) + u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs + + inputs + u = initial condition (vector of u(x) values on uniform gridpoints)) + Lx = domain length + dt = time step + Nt = number of integration timesteps + nsave = save every nsave-th time step + + outputs + u = final state, vector of u(x, Nt*dt) at uniform x gridpoints + """ + + Nx = size(u); + kx = concatenate((arange(0,Nx/2), array([0]), arange(-Nx/2+1,0))) # int wavenumbers: exp(2*pi*kx*x/L) + alpha = 2*pi*kx/Lx; # real wavenumbers: exp(alpha*x) + D = 1j*alpha; # D = d/dx operator in Fourier space + L = pow(alpha,2) - pow(alpha,4); # linear operator -D^2 - D^3 in Fourier space + G = -0.5*D; # -1/2 D operator in Fourier space + + # Express PDE as u_t = Lu + N(u), L is linear part, N nonlinear part. + # Then Crank-Nicolson Adams-Bashforth discretization is + # + # (I - dt/2 L) u^{n+1} = (I + dt/2 L) u^n + 3dt/2 N^n - dt/2 N^{n-1} + # + # let A = (I - dt/2 L) + # B = (I + dt/2 L), then the CNAB timestep formula + # + # u^{n+1} = A^{-1} (B u^n + 3dt/2 N^n - dt/2 N^{n-1}) + + # some convenience variables + dt2 = dt/2; + dt32 = 3*dt/2; + A = ones(Nx) + dt2*L; + B = 1.0/(ones(Nx) - dt2*L) + + Nn = G*fft(u*u); # compute -u u_x (spectral), notation Nn = N^n = N(u(n dt)) + Nn1 = Nn; # notation Nn1 = N^{n-1} = N(u((n-1) dt)) + u = fft(u); # transform u (spectral) + + # timestepping loop + for n in range(0,int(Nt)) : + + Nn1 = Nn; # shift nonlinear term in time: N^{n-1} <- N^n + Nn = G*fft(real(ifft(u*u))); # compute Nn == -u u_x (spectral) + + u = B * (A * u + dt32*Nn - dt2*Nn1); + + return real(ifft(u)); diff --git a/kuramoto_sivashinksy/codes/ksintegrateInplace.jl b/kuramoto_sivashinksy/codes/ksintegrateInplace.jl new file mode 100644 index 0000000..268650c --- /dev/null +++ b/kuramoto_sivashinksy/codes/ksintegrateInplace.jl @@ -0,0 +1,64 @@ +""" +ksintegrate: integrate kuramoto-sivashinsky equation (Julia) + u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs + + inputs + u = initial condition (vector of u(x) values on uniform gridpoints)) + Lx = domain length + dt = time step + Nt = number of integration timesteps + nsave = save every nsave-th time step + + outputs + + u = final state, vector of u(x, Nt*dt) at uniform x gridpoints + +This implementation has two improvements over ksintegrateNaive.jl. It uses + (1) in-place FFTs + (2) loop fusion: Julia can translate arithmetic vector expressions in dot syntax + to single for loop over the components, which should be much faster than + constructing a temporary vector for each operation in the vector expression. +""" +function ksintegrateInplace(u, Lx, dt, Nt) + u = (1+0im)*u # force u to be complex + Nx = length(u) # number of gridpoints + kx = vcat(0:Nx/2-1, 0:0, -Nx/2+1:-1)# integer wave #s, exp(2*pi*i*kx*x/L) + alpha = 2*pi*kx/Lx # real wavenumbers, exp(i*alpha*x) + D = 1im*alpha # spectral D = d/dx operator + L = alpha.^2 - alpha.^4 # spectral L = -D^2 - D^4 operator + G = -0.5*D # spectral -1/2 D operator, to eval -u u_x = 1/2 d/dx u^2 + + # convenience variables + dt2 = dt/2 + dt32 = 3*dt/2 + A_inv = (ones(Nx) - dt2*L).^(-1) + B = ones(Nx) + dt2*L + + # compute FFTW plans + FFT! = plan_fft!(u, flags=FFTW.ESTIMATE) + IFFT! = plan_ifft!(u, flags=FFTW.ESTIMATE) + + # compute nonlinear term Nn == -u u_x and Nuprev (Nu at prev timestep) + Nn = G.*fft(u.^2) # Nn == -1/2 d/dx (u^2) = -u u_x + Nn1 = copy(Nn) # use Nn1 = Nn at first time step + FFT!*u + + # timestepping loop + for n = 0:Nt + + Nn1 .= Nn # shift nonlinear term in time + Nn .= u # put u into N in prep for comp of nonlineat + + IFFT!*Nn # transform Nn to gridpt values, in place + Nn .= Nn.*Nn # collocation calculation of u^2 + FFT!*Nn # transform Nn back to spectral coeffs, in place + + Nn .= G.*Nn + + # loop fusion! Julia translates the folling line of code to a single for loop. + u .= A_inv .*(B .* u .+ dt32.*Nn .- dt2.*Nn1) + end + + IFFT!*u + real(u) +end diff --git a/kuramoto_sivashinksy/codes/ksintegrateNaive.jl b/kuramoto_sivashinksy/codes/ksintegrateNaive.jl new file mode 100644 index 0000000..5665b52 --- /dev/null +++ b/kuramoto_sivashinksy/codes/ksintegrateNaive.jl @@ -0,0 +1,58 @@ +""" +ksintegrateNaive: integrate kuramoto-sivashinsky equation (Julia) + u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs + + inputs + u = initial condition (vector of u(x) values on uniform gridpoints)) + Lx = domain length + dt = time step + Nt = number of integration timesteps + nsave = save every nsave-th time step + + outputs + u = final state, vector of u(x, Nt*dt) at uniform x gridpoints + +This a line-by-line translation of a Matlab code into Julia. It uses out-of-place +FFTs and doesn't pay any attention to the allocation of temporary vectors within +the time-stepping loop. Hence the name "naive". +""" +function ksintegrateNaive(u, Lx, dt, Nt) + Nx = length(u) # number of gridpoints + x = collect(0:(Nx-1)/Nx)*Lx + kx = vcat(0:Nx/2-1, 0, -Nx/2+1:-1) # integer wavenumbers: exp(2*pi*kx*x/L) + alpha = 2*pi*kx/Lx # real wavenumbers: exp(alpha*x) + D = 1im*alpha # D = d/dx operator in Fourier space + L = alpha.^2 - alpha.^4 # linear operator -D^2 - D^4 in Fourier space + G = -0.5*D # -1/2 D operator in Fourier space + + # Express PDE as u_t = Lu + N(u), L is linear part, N nonlinear part. + # Then Crank-Nicolson Adams-Bashforth discretization is + # + # (I - dt/2 L) u^{n+1} = (I + dt/2 L) u^n + 3dt/2 N^n - dt/2 N^{n-1} + # + # let A = (I - dt/2 L) + # B = (I + dt/2 L), then the CNAB timestep formula + # + # u^{n+1} = A^{-1} (B u^n + 3dt/2 N^n - dt/2 N^{n-1}) + + # convenience variables + dt2 = dt/2 + dt32 = 3*dt/2 + A = ones(Nx) + dt2*L + B = (ones(Nx) - dt2*L).^(-1) + + Nn = G.*fft(u.*u) # -u u_x (spectral), notation Nn = N^n = N(u(n dt)) + Nn1 = copy(Nn) # notation Nn1 = N^{n-1} = N(u((n-1) dt)) + u = fft(u) # transform u to spectral + + # timestepping loop + for n = 0:Nt + Nn1 = copy(Nn) # shift nonlinear term in time: N^{n-1} <- N^n + Nn = G.*fft(real(ifft(u)).^2) # compute Nn = -u u_x + + u = B .* (A .* u + dt32*Nn - dt2*Nn1) + end + + real(ifft(u)) +end + diff --git a/kuramoto_sivashinksy/codes/ksintegrateUnrolled.jl b/kuramoto_sivashinksy/codes/ksintegrateUnrolled.jl new file mode 100644 index 0000000..559d172 --- /dev/null +++ b/kuramoto_sivashinksy/codes/ksintegrateUnrolled.jl @@ -0,0 +1,71 @@ +""" +ksintegrateUnrolled: integrate kuramoto-sivashinsky equation (Julia) + + + u_t = -u*u_x - u_xx - u_xxxx, domain x in [0,Lx], periodic BCs + + inputs + u = initial condition (vector of u(x) values on uniform gridpoints)) + Lx = domain length + dt = time step + Nt = number of integration timesteps + nsave = save every nsave-th time step + + outputs + u = final state, vector of u(x, Nt*dt) at uniform x gridpoints + +This implementation has one improvements over ksintegrateInplace.jl. It explicitly +unrolls all the vector equations into for loops. For some reason this works +better (runs faster) than the unrolled loops produced by the Julia compiler. +""" +function ksintegrateUnrolled(u, Lx, dt, Nt) + u = (1+0im)*u # force u to be complex + Nx = length(u) # number of gridpoints + kx = vcat(0:Nx/2-1, 0:0, -Nx/2+1:-1)# integer wavenumbers: exp(2*pi*kx*x/L) + alpha = 2*pi*kx/Lx # real wavenumbers: exp(alpha*x) + D = 1im*alpha # spectral D = d/dx operator + L = alpha.^2 - alpha.^4 # spectral L = -D^2 - D^4 operator + G = -0.5*D # spectral -1/2 D operator, to eval -u u_x = 1/2 d/dx u^2 + + # convenience variables + dt2 = dt/2 + dt32 = 3*dt/2 + A = ones(Nx) + dt2*L + B = (ones(Nx) - dt2*L).^(-1) + + # compute FFTW plans + FFT! = plan_fft!(u, flags=FFTW.ESTIMATE) + IFFT! = plan_ifft!(u, flags=FFTW.ESTIMATE) + + # compute nonlinear term Nn == N(u^n) = -u u_x and Nn1 + Nn = G.*fft(u.^2) # Nn == -1/2 d/dx (u^2) = -u u_x, spectral + Nn1 = copy(Nn) # Nn1 == N(u^{n-1}). For first timestep, let Nn1 = Nn + FFT!*u + + # timestepping loop + for n = 0:Nt + + Nn1 .= Nn + Nn .= u + + IFFT!*Nn # in-place FFT + + @inbounds for i = 1:length(Nn) + @fastmath Nn[i] = Nn[i]*Nn[i] + end + + FFT!*Nn + + @inbounds for i = 1:length(Nn) + @fastmath Nn[i] = G[i]*Nn[i] + end + + @inbounds for i = 1:length(u) + @fastmath u[i] = B[i]* (A[i] * u[i] + dt32*Nn[i] - dt2*Nn1[i]) + end + + end + + IFFT!*u + real(u) +end diff --git a/kuramoto_sivashinksy/stripped/ksstripped.cpp b/kuramoto_sivashinksy/stripped/ksstripped.cpp new file mode 100644 index 0000000..8eb0bbd --- /dev/null +++ b/kuramoto_sivashinksy/stripped/ksstripped.cpp @@ -0,0 +1,76 @@ +void ksintegrate(Complex* u, const int Nx, const double Lx, const double dt, const int Nt) { + int* kx = new int[Nx]; + double* alpha = new double[Nx]; + Complex* D = new Complex[Nx]; + Complex* G = new Complex[Nx]; + double* L = new double[Nx]; + for (int n=0; n(u); + fftw_complex* uu_fftw = reinterpret_cast(uu); + fftw_plan u_fftw_plan = fftw_plan_dft_1d(Nx, u_fftw, u_fftw, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_plan u_ifftw_plan = fftw_plan_dft_1d(Nx, u_fftw, u_fftw, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_plan uu_fftw_plan = fftw_plan_dft_1d(Nx, uu_fftw, uu_fftw, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_plan uu_ifftw_plan = fftw_plan_dft_1d(Nx, uu_fftw, uu_fftw, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_execute(uu_fftw_plan); + Complex* Nn = new Complex[Nx]; + for (int n=0; n