diff --git a/docs/tutorials/astrometric.ipynb b/docs/tutorials/astrometric.ipynb
index d871c170..18465ecb 100644
--- a/docs/tutorials/astrometric.ipynb
+++ b/docs/tutorials/astrometric.ipynb
@@ -93,7 +93,7 @@
     "astro_data[\"rho_err\"][astro_data[\"rho_err\"].mask == True] = 0.01\n",
     "astro_data[\"PA_err\"][astro_data[\"PA_err\"].mask == True] = 1.0\n",
     "\n",
-    "# Convert all masked frames to be raw np arrays, since theano has issues with astropy masked columns\n",
+    "# Convert all masked frames to be raw np arrays, since pytensor has issues with astropy masked columns\n",
     "rho_data = np.ascontiguousarray(astro_data[\"rho\"], dtype=float)  # arcsec\n",
     "rho_err = np.ascontiguousarray(astro_data[\"rho_err\"], dtype=float)\n",
     "\n",
@@ -192,11 +192,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import pymc3_ext as pmx\n",
+    "import pymc as pm\n",
+    "import pymc_ext as pmx\n",
     "\n",
-    "from aesara_theano_fallback import aesara as theano\n",
-    "import aesara_theano_fallback.tensor as tt\n",
+    "import pytensor\n",
+    "import pytensor.tensor as pt\n",
     "\n",
     "import exoplanet as xo\n",
     "\n",
@@ -230,7 +230,7 @@
     "# The position functions take an optional argument parallax to convert from\n",
     "# physical units back to arcseconds\n",
     "t = np.linspace(T0 - P, T0 + P, num=200)  # days\n",
-    "rho, theta = theano.function([], orbit.get_relative_angles(t, parallax))()\n",
+    "rho, theta = pytensor.function([], orbit.get_relative_angles(t, parallax))()\n",
     "\n",
     "# Plot the orbit\n",
     "fig, ax = plt.subplots(nrows=1, figsize=(4, 4))\n",
@@ -268,7 +268,7 @@
    "id": "ecd7ec33",
    "metadata": {},
    "source": [
-    "Now that we have an initial orbit, we can set the model up using PyMC3 to do inference."
+    "Now that we have an initial orbit, we can set the model up using PyMC to do inference."
    ]
   },
   {
@@ -293,43 +293,47 @@
     "        else:\n",
     "            # Below we will run a version of this model where a measurement of parallax is provided\n",
     "            # The measurement is in milliarcsec\n",
-    "            m_plx = pm.Bound(pm.Normal, lower=0, upper=100)(\n",
-    "                \"m_plx\", mu=parallax[0], sd=parallax[1], testval=parallax[0]\n",
+    "            m_plx = pm.Truncated(\n",
+    "                \"m_plx\",\n",
+    "                pm.Normal.dist(mu=parallax[0], sigma=parallax[1]),\n",
+    "                lower=0,\n",
+    "                upper=100,\n",
+    "                initval=parallax[0],\n",
     "            )\n",
     "            plx = pm.Deterministic(\"plx\", 1e-3 * m_plx)\n",
     "\n",
-    "        a_ang = pm.Uniform(\"a_ang\", 0.1, 1.0, testval=0.324)\n",
+    "        a_ang = pm.Uniform(\"a_ang\", 0.1, 1.0, initval=0.324)\n",
     "        a = pm.Deterministic(\"a\", a_ang / plx)\n",
     "\n",
     "        # We expect the period to be somewhere in the range of 25 years,\n",
     "        # so we'll set a broad prior on logP\n",
     "        logP = pm.Normal(\n",
-    "            \"logP\", mu=np.log(25 * yr), sd=10.0, testval=np.log(28.8 * yr)\n",
+    "            \"logP\", mu=np.log(25 * yr), sigma=10.0, initval=np.log(28.8 * yr)\n",
     "        )\n",
-    "        P = pm.Deterministic(\"P\", tt.exp(logP))\n",
+    "        P = pm.Deterministic(\"P\", pt.exp(logP))\n",
     "\n",
     "        # For astrometric-only fits, it's generally better to fit in\n",
     "        # p = (Omega + omega)/2 and m = (Omega - omega)/2 instead of omega and Omega\n",
     "        # directly\n",
     "        omega0 = 251.6 * deg - np.pi\n",
     "        Omega0 = 159.6 * deg\n",
-    "        p = pmx.Angle(\"p\", testval=0.5 * (Omega0 + omega0))\n",
-    "        m = pmx.Angle(\"m\", testval=0.5 * (Omega0 - omega0))\n",
+    "        p = pmx.angle(\"p\", initval=0.5 * (Omega0 + omega0))\n",
+    "        m = pmx.angle(\"m\", initval=0.5 * (Omega0 - omega0))\n",
     "        omega = pm.Deterministic(\"omega\", p - m)\n",
     "        Omega = pm.Deterministic(\"Omega\", p + m)\n",
     "\n",
     "        # For these orbits, it can also be better to fit for a phase angle\n",
     "        # (relative to a reference time) instead of the time of periastron\n",
     "        # passage directly\n",
-    "        phase = pmx.Angle(\"phase\", testval=0.0)\n",
+    "        phase = pmx.angle(\"phase\", initval=0.0)\n",
     "        tperi = pm.Deterministic(\"tperi\", T0 + P * phase / (2 * np.pi))\n",
     "\n",
     "        # Geometric uniform prior on cos(incl)\n",
     "        cos_incl = pm.Uniform(\n",
-    "            \"cos_incl\", lower=-1, upper=1, testval=np.cos(96.0 * deg)\n",
+    "            \"cos_incl\", lower=-1, upper=1, initval=np.cos(96.0 * deg)\n",
     "        )\n",
-    "        incl = pm.Deterministic(\"incl\", tt.arccos(cos_incl))\n",
-    "        ecc = pm.Uniform(\"ecc\", lower=0.0, upper=1.0, testval=0.798)\n",
+    "        incl = pm.Deterministic(\"incl\", pt.arccos(cos_incl))\n",
+    "        ecc = pm.Uniform(\"ecc\", lower=0.0, upper=1.0, initval=0.798)\n",
     "\n",
     "        # Set up the orbit\n",
     "        orbit = xo.orbits.KeplerianOrbit(\n",
@@ -351,24 +355,31 @@
     "\n",
     "        # Add jitter terms to both separation and position angle\n",
     "        log_rho_s = pm.Normal(\n",
-    "            \"log_rho_s\", mu=np.log(np.median(rho_err)), sd=2.0\n",
+    "            \"log_rho_s\", mu=np.log(np.median(rho_err)), sigma=2.0\n",
     "        )\n",
     "        log_theta_s = pm.Normal(\n",
-    "            \"log_theta_s\", mu=np.log(np.median(theta_err)), sd=2.0\n",
+    "            \"log_theta_s\", mu=np.log(np.median(theta_err)), sigma=2.0\n",
     "        )\n",
-    "        rho_tot_err = tt.sqrt(rho_err**2 + tt.exp(2 * log_rho_s))\n",
-    "        theta_tot_err = tt.sqrt(theta_err**2 + tt.exp(2 * log_theta_s))\n",
+    "        rho_tot_err = pt.sqrt(rho_err**2 + pt.exp(2 * log_rho_s))\n",
+    "        theta_tot_err = pt.sqrt(theta_err**2 + pt.exp(2 * log_theta_s))\n",
     "\n",
     "        # define the likelihood function, e.g., a Gaussian on both rho and theta\n",
-    "        pm.Normal(\"rho_obs\", mu=rho_model, sd=rho_tot_err, observed=rho_data)\n",
+    "        pm.Normal(\n",
+    "            \"rho_obs\", mu=rho_model, sigma=rho_tot_err, observed=rho_data\n",
+    "        )\n",
     "\n",
     "        # We want to be cognizant of the fact that theta wraps so the following is equivalent to\n",
     "        # pm.Normal(\"obs_theta\", mu=theta_model, observed=theta_data, sd=theta_tot_err)\n",
     "        # but takes into account the wrapping. Thanks to Rob de Rosa for the tip.\n",
-    "        theta_diff = tt.arctan2(\n",
-    "            tt.sin(theta_model - theta_data), tt.cos(theta_model - theta_data)\n",
+    "        theta_diff = pt.arctan2(\n",
+    "            pt.sin(theta_model - theta_data), pt.cos(theta_model - theta_data)\n",
+    "        )\n",
+    "        pm.Normal(\n",
+    "            \"theta_obs\",\n",
+    "            mu=theta_diff,\n",
+    "            sigma=theta_tot_err,\n",
+    "            observed=np.zeros_like(theta_data),\n",
     "        )\n",
-    "        pm.Normal(\"theta_obs\", mu=theta_diff, sd=theta_tot_err, observed=0.0)\n",
     "\n",
     "        # Set up predicted orbits for later plotting\n",
     "        rho_dense, theta_dense = orbit.get_relative_angles(t_fine, plx)\n",
@@ -376,7 +387,7 @@
     "        theta_save = pm.Deterministic(\"theta_save\", theta_dense)\n",
     "\n",
     "        # Optimize to find the initial parameters\n",
-    "        map_soln = model.test_point\n",
+    "        map_soln = model.initial_point()\n",
     "        map_soln = pmx.optimize(map_soln, vars=[log_rho_s, log_theta_s])\n",
     "        map_soln = pmx.optimize(map_soln, vars=[phase])\n",
     "        map_soln = pmx.optimize(map_soln, vars=[p, m, ecc])\n",
@@ -453,10 +464,10 @@
    "source": [
     "np.random.seed(1234)\n",
     "with model:\n",
-    "    trace = pmx.sample(\n",
+    "    trace = pmx.utils.sample(\n",
     "        tune=1000,\n",
     "        draws=1000,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        cores=2,\n",
     "        chains=2,\n",
     "        target_accept=0.9,\n",
@@ -596,10 +607,10 @@
    "source": [
     "np.random.seed(5432)\n",
     "with plx_model:\n",
-    "    plx_trace = pmx.sample(\n",
+    "    plx_trace = pmx.utils.sample(\n",
     "        tune=1000,\n",
     "        draws=1000,\n",
-    "        start=plx_map_soln,\n",
+    "        initvals=plx_map_soln,\n",
     "        cores=2,\n",
     "        chains=2,\n",
     "        target_accept=0.9,\n",
diff --git a/docs/tutorials/eb.ipynb b/docs/tutorials/eb.ipynb
index 5fbdd7e1..830b400a 100644
--- a/docs/tutorials/eb.ipynb
+++ b/docs/tutorials/eb.ipynb
@@ -168,7 +168,7 @@
    "source": [
     "## Probabilistic model\n",
     "\n",
-    "Then we define the probabilistic model using PyMC3 and exoplanet.\n",
+    "Then we define the probabilistic model using PyMC and exoplanet.\n",
     "This is similar to the other tutorials and case studies, but here we're using a [SecondaryEclipseLightCurve](https://docs.exoplanet.codes/en/stable/user/api/#exoplanet.SecondaryEclipseLightCurve) to generate the model light curve and we're modeling the radial velocity trends using a Gaussian process instead of a polynomial.\n",
     "Otherwise, things should look pretty familiar!\n",
     "\n",
@@ -182,27 +182,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import aesara_theano_fallback.tensor as tt\n",
+    "import pymc as pm\n",
+    "import pytensor.tensor as pt\n",
     "\n",
     "import exoplanet as xo\n",
-    "import pymc3_ext as pmx\n",
-    "from celerite2.theano import terms, GaussianProcess\n",
+    "import pymc_ext as pmx\n",
+    "from celerite2.pymc import terms, GaussianProcess\n",
     "\n",
     "\n",
     "def build_model(mask):\n",
     "    with pm.Model() as model:\n",
     "        # Systemic parameters\n",
-    "        mean_lc = pm.Normal(\"mean_lc\", mu=0.0, sd=5.0)\n",
-    "        mean_rv = pm.Normal(\"mean_rv\", mu=0.0, sd=50.0)\n",
-    "        u1 = xo.QuadLimbDark(\"u1\")\n",
-    "        u2 = xo.QuadLimbDark(\"u2\")\n",
+    "        mean_lc = pm.Normal(\"mean_lc\", mu=0.0, sigma=5.0)\n",
+    "        mean_rv = pm.Normal(\"mean_rv\", mu=0.0, sigma=50.0)\n",
+    "        u1 = xo.quad_limb_dark(\"u1\")\n",
+    "        u2 = xo.quad_limb_dark(\"u2\")\n",
     "\n",
     "        # Parameters describing the primary\n",
     "        log_M1 = pm.Normal(\"log_M1\", mu=0.0, sigma=10.0)\n",
     "        log_R1 = pm.Normal(\"log_R1\", mu=0.0, sigma=10.0)\n",
-    "        M1 = pm.Deterministic(\"M1\", tt.exp(log_M1))\n",
-    "        R1 = pm.Deterministic(\"R1\", tt.exp(log_R1))\n",
+    "        M1 = pm.Deterministic(\"M1\", pt.exp(log_M1))\n",
+    "        R1 = pm.Deterministic(\"R1\", pt.exp(log_R1))\n",
     "\n",
     "        # Secondary ratios\n",
     "        log_k = pm.Normal(\"log_k\", mu=0.0, sigma=10.0)  # radius ratio\n",
@@ -210,32 +210,35 @@
     "        log_s = pm.Normal(\n",
     "            \"log_s\", mu=np.log(0.5), sigma=10.0\n",
     "        )  # surface brightness ratio\n",
-    "        pm.Deterministic(\"k\", tt.exp(log_k))\n",
-    "        pm.Deterministic(\"q\", tt.exp(log_q))\n",
-    "        pm.Deterministic(\"s\", tt.exp(log_s))\n",
+    "        pm.Deterministic(\"k\", pt.exp(log_k))\n",
+    "        pm.Deterministic(\"q\", pt.exp(log_q))\n",
+    "        pm.Deterministic(\"s\", pt.exp(log_s))\n",
     "\n",
     "        # Prior on flux ratio\n",
-    "        pm.Normal(\n",
+    "        pm.Potential(\n",
     "            \"flux_prior\",\n",
-    "            mu=lit_flux_ratio[0],\n",
-    "            sigma=lit_flux_ratio[1],\n",
-    "            observed=tt.exp(2 * log_k + log_s),\n",
+    "            pm.logp(\n",
+    "                pm.Normal.dist(mu=lit_flux_ratio[0], sigma=lit_flux_ratio[1]),\n",
+    "                pt.exp(2 * log_k + log_s),\n",
+    "            ),\n",
     "        )\n",
     "\n",
     "        # Parameters describing the orbit\n",
-    "        b = xo.ImpactParameter(\"b\", ror=tt.exp(log_k), testval=1.5)\n",
+    "        b = xo.impact_parameter(\"b\", ror=pt.exp(log_k), initval=1.5)\n",
     "        log_period = pm.Normal(\"log_period\", mu=np.log(lit_period), sigma=1.0)\n",
-    "        period = pm.Deterministic(\"period\", tt.exp(log_period))\n",
+    "        period = pm.Deterministic(\"period\", pt.exp(log_period))\n",
     "        t0 = pm.Normal(\"t0\", mu=lit_t0, sigma=1.0)\n",
     "\n",
     "        # Parameters describing the eccentricity: ecs = [e * cos(w), e * sin(w)]\n",
-    "        ecs = pmx.UnitDisk(\"ecs\", testval=np.array([1e-5, 0.0]))\n",
-    "        ecc = pm.Deterministic(\"ecc\", tt.sqrt(tt.sum(ecs**2)))\n",
-    "        omega = pm.Deterministic(\"omega\", tt.arctan2(ecs[1], ecs[0]))\n",
+    "        ecosw, esinw = pmx.unit_disk(\n",
+    "            \"ecosw\", \"esinw\", initval=np.array([1e-5, 0.0])\n",
+    "        )\n",
+    "        ecc = pm.Deterministic(\"ecc\", pt.sqrt(ecosw**2 + esinw**2))\n",
+    "        omega = pm.Deterministic(\"omega\", pt.arctan2(esinw, ecosw))\n",
     "\n",
     "        # Build the orbit\n",
-    "        R2 = pm.Deterministic(\"R2\", tt.exp(log_k + log_R1))\n",
-    "        M2 = pm.Deterministic(\"M2\", tt.exp(log_q + log_M1))\n",
+    "        R2 = pm.Deterministic(\"R2\", pt.exp(log_k + log_R1))\n",
+    "        M2 = pm.Deterministic(\"M2\", pt.exp(log_q + log_M1))\n",
     "        orbit = xo.orbits.KeplerianOrbit(\n",
     "            period=period,\n",
     "            t0=t0,\n",
@@ -254,46 +257,46 @@
     "        # Noise model for the light curve\n",
     "        sigma_lc = pm.InverseGamma(\n",
     "            \"sigma_lc\",\n",
-    "            testval=1.0,\n",
-    "            **pmx.estimate_inverse_gamma_parameters(0.1, 2.0),\n",
+    "            initval=1.0,\n",
+    "            **pmx.utils.estimate_inverse_gamma_parameters(0.1, 2.0),\n",
     "        )\n",
     "        sigma_gp = pm.InverseGamma(\n",
     "            \"sigma_gp\",\n",
-    "            testval=0.5,\n",
-    "            **pmx.estimate_inverse_gamma_parameters(1.0, 5.0),\n",
+    "            initval=0.5,\n",
+    "            **pmx.utils.estimate_inverse_gamma_parameters(1.0, 5.0),\n",
     "        )\n",
     "        rho_gp = pm.InverseGamma(\n",
     "            \"rho_gp\",\n",
-    "            testval=5.0,\n",
-    "            **pmx.estimate_inverse_gamma_parameters(1.0, 5.0),\n",
+    "            initval=5.0,\n",
+    "            **pmx.utils.estimate_inverse_gamma_parameters(1.0, 5.0),\n",
     "        )\n",
     "        kernel_lc = terms.SHOTerm(sigma=sigma_gp, rho=rho_gp, Q=1.0 / 3)\n",
     "\n",
     "        # Noise model for the radial velocities\n",
     "        sigma_rv1 = pm.InverseGamma(\n",
     "            \"sigma_rv1\",\n",
-    "            testval=1.0,\n",
-    "            **pmx.estimate_inverse_gamma_parameters(0.5, 5.0),\n",
+    "            initval=1.0,\n",
+    "            **pmx.utils.estimate_inverse_gamma_parameters(0.5, 5.0),\n",
     "        )\n",
     "        sigma_rv2 = pm.InverseGamma(\n",
     "            \"sigma_rv2\",\n",
-    "            testval=1.0,\n",
-    "            **pmx.estimate_inverse_gamma_parameters(0.5, 5.0),\n",
+    "            initval=1.0,\n",
+    "            **pmx.utils.estimate_inverse_gamma_parameters(0.5, 5.0),\n",
     "        )\n",
     "        sigma_rv_gp = pm.InverseGamma(\n",
     "            \"sigma_rv_gp\",\n",
-    "            testval=1.5,\n",
-    "            **pmx.estimate_inverse_gamma_parameters(1.0, 5.0),\n",
+    "            initval=1.5,\n",
+    "            **pmx.utils.estimate_inverse_gamma_parameters(1.0, 5.0),\n",
     "        )\n",
     "        rho_rv_gp = pm.InverseGamma(\n",
     "            \"rho_rv_gp\",\n",
-    "            testval=2.0,\n",
-    "            **pmx.estimate_inverse_gamma_parameters(1.0, 25.0),\n",
+    "            initval=2.0,\n",
+    "            **pmx.utils.estimate_inverse_gamma_parameters(1.0, 25.0),\n",
     "        )\n",
     "        kernel_rv = terms.SHOTerm(sigma=sigma_rv_gp, w0=rho_rv_gp, Q=1.0 / 3)\n",
     "\n",
     "        # Set up the light curve model\n",
-    "        lc = xo.SecondaryEclipseLightCurve(u1, u2, tt.exp(log_s))\n",
+    "        lc = xo.SecondaryEclipseLightCurve(u1, u2, pt.exp(log_s))\n",
     "\n",
     "        def model_lc(t):\n",
     "            return (\n",
@@ -303,45 +306,57 @@
     "            )\n",
     "\n",
     "        # Condition the light curve model on the data\n",
-    "        gp_lc = GaussianProcess(kernel_lc, t=x[mask], yerr=sigma_lc)\n",
-    "        gp_lc.marginal(\"obs_lc\", observed=y[mask] - model_lc(x[mask]))\n",
+    "        gp_lc = GaussianProcess(\n",
+    "            kernel_lc,\n",
+    "            t=x[mask],\n",
+    "            yerr=sigma_lc,\n",
+    "            mean=model_lc(x[mask]),\n",
+    "            quiet=True,\n",
+    "        )\n",
+    "        gp_lc.marginal(\"obs_lc\", observed=y[mask])\n",
     "\n",
     "        # Set up the radial velocity model\n",
     "        def model_rv1(t):\n",
     "            return mean_rv + 1e-3 * orbit.get_radial_velocity(t)\n",
     "\n",
     "        def model_rv2(t):\n",
-    "            return mean_rv - 1e-3 * orbit.get_radial_velocity(t) * tt.exp(\n",
+    "            return mean_rv - 1e-3 * orbit.get_radial_velocity(t) * pt.exp(\n",
     "                -log_q\n",
     "            )\n",
     "\n",
     "        # Condition the radial velocity model on the data\n",
-    "        gp_rv1 = GaussianProcess(kernel_rv, t=x_rv, yerr=sigma_rv1)\n",
-    "        gp_rv1.marginal(\"obs_rv1\", observed=y1_rv - model_rv1(x_rv))\n",
-    "        gp_rv2 = GaussianProcess(kernel_rv, t=x_rv, yerr=sigma_rv2)\n",
-    "        gp_rv2.marginal(\"obs_rv2\", observed=y2_rv - model_rv2(x_rv))\n",
+    "        gp_rv1 = GaussianProcess(\n",
+    "            kernel_rv, t=x_rv, yerr=sigma_rv1, mean=model_rv1(x_rv), quiet=True\n",
+    "        )\n",
+    "        gp_rv1.marginal(\"obs_rv1\", observed=y1_rv)\n",
+    "        gp_rv2 = GaussianProcess(\n",
+    "            kernel_rv, t=x_rv, yerr=sigma_rv2, mean=model_rv2(x_rv), quiet=True\n",
+    "        )\n",
+    "        gp_rv2.marginal(\"obs_rv2\", observed=y2_rv)\n",
     "\n",
     "        # Optimize the logp\n",
-    "        map_soln = model.test_point\n",
+    "        map_soln = model.initial_point()\n",
     "\n",
     "        # First the RV parameters\n",
     "        map_soln = pmx.optimize(map_soln, [mean_rv, log_q])\n",
+    "        map_soln = pmx.optimize(map_soln, [mean_rv, log_q, ecosw, esinw])\n",
+    "        # Then light curve parameters\n",
     "        map_soln = pmx.optimize(\n",
-    "            map_soln, [mean_rv, sigma_rv1, sigma_rv2, sigma_rv_gp, rho_rv_gp]\n",
-    "        )\n",
-    "\n",
-    "        # Then the LC parameters\n",
-    "        map_soln = pmx.optimize(map_soln, [mean_lc, log_R1, log_k, log_s, b])\n",
-    "        map_soln = pmx.optimize(\n",
-    "            map_soln, [mean_lc, log_R1, log_k, log_s, b, u1, u2]\n",
-    "        )\n",
-    "        map_soln = pmx.optimize(\n",
-    "            map_soln, [mean_lc, sigma_lc, sigma_gp, rho_gp]\n",
+    "            map_soln,\n",
+    "            [\n",
+    "                mean_lc,\n",
+    "                log_R1,\n",
+    "                log_k,\n",
+    "                log_s,\n",
+    "                b,\n",
+    "                u1,\n",
+    "                u2,\n",
+    "                sigma_lc,\n",
+    "                sigma_gp,\n",
+    "                rho_gp,\n",
+    "            ],\n",
     "        )\n",
-    "        map_soln = pmx.optimize(map_soln, [t0, log_period])\n",
-    "\n",
-    "        # Then all the parameters together\n",
-    "        map_soln = pmx.optimize(map_soln, [mean_rv, log_q, ecs])\n",
+    "        # Then all parameters\n",
     "        map_soln = pmx.optimize(map_soln)\n",
     "\n",
     "        extras = dict(\n",
@@ -350,7 +365,7 @@
     "            model_lc=model_lc,\n",
     "            model_rv1=model_rv1,\n",
     "            model_rv2=model_rv2,\n",
-    "            gp_lc_pred=gp_lc.predict(y[mask] - model_lc(x[mask])),\n",
+    "            gp_lc_pred=gp_lc.predict(y[mask]),\n",
     "        )\n",
     "\n",
     "    return model, map_soln, extras\n",
@@ -365,7 +380,7 @@
     "\n",
     "        with model:\n",
     "            mdl = pmx.eval_in_model(\n",
-    "                extras[\"model_lc\"](extras[\"x\"]) + extras[\"gp_lc_pred\"],\n",
+    "                extras[\"gp_lc_pred\"],\n",
     "                map_soln,\n",
     "            )\n",
     "\n",
@@ -444,7 +459,10 @@
     "t_lc_pred = np.linspace(x.min(), x.max(), 3000)\n",
     "with model:\n",
     "    gp_pred = (\n",
-    "        pmx.eval_in_model(extras[\"gp_lc_pred\"], map_soln) + map_soln[\"mean_lc\"]\n",
+    "        pmx.eval_in_model(\n",
+    "            extras[\"gp_lc_pred\"] - extras[\"model_lc\"](extras[\"x\"]), map_soln\n",
+    "        )\n",
+    "        + map_soln[\"mean_lc\"]\n",
     "    )\n",
     "    lc = (\n",
     "        pmx.eval_in_model(extras[\"model_lc\"](t_lc_pred), map_soln)\n",
@@ -523,10 +541,10 @@
    "source": [
     "np.random.seed(23642)\n",
     "with model:\n",
-    "    trace = pmx.sample(\n",
+    "    trace = pmx.utils.sample(\n",
     "        tune=1500,\n",
     "        draws=1000,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        cores=2,\n",
     "        chains=2,\n",
     "        target_accept=0.95,\n",
@@ -551,7 +569,9 @@
    "source": [
     "import arviz as az\n",
     "\n",
-    "az.summary(trace, var_names=[\"M1\", \"M2\", \"R1\", \"R2\", \"ecs\", \"incl\", \"s\"])"
+    "az.summary(\n",
+    "    trace, var_names=[\"M1\", \"M2\", \"R1\", \"R2\", \"ecosw\", \"esinw\", \"incl\", \"s\"]\n",
+    ")"
    ]
   },
   {
@@ -576,7 +596,7 @@
     "\n",
     "_ = corner.corner(\n",
     "    trace,\n",
-    "    var_names=[\"k\", \"q\", \"ecs\"],\n",
+    "    var_names=[\"k\", \"q\", \"ecosw\", \"esinw\"],\n",
     "    labels=[\n",
     "        \"$k = R_2 / R_1$\",\n",
     "        \"$q = M_2 / M_1$\",\n",
@@ -633,7 +653,7 @@
     "\n",
     "If you looked closely at the model defined above, you might have noticed that we chose a slightly odd eccentricity prior: $p(e) \\propto e$.\n",
     "This is implied by sampling with $e\\,\\cos\\omega$ and $e\\,\\sin\\omega$ as the parameters, as has been discussed many times in the literature.\n",
-    "There are many options for correcting for this prior and instead assuming a uniform prior on eccentricity (for example, sampling with $\\sqrt{e}\\,\\cos\\omega$ and $\\sqrt{e}\\,\\sin\\omega$ as the parameters), but you'll find much worse sampling performance for this problem if you try any of these options (trust us, we tried!) because the geometry of the posterior surface becomes much less suitable for the sampling algorithm in PyMC3.\n",
+    "There are many options for correcting for this prior and instead assuming a uniform prior on eccentricity (for example, sampling with $\\sqrt{e}\\,\\cos\\omega$ and $\\sqrt{e}\\,\\sin\\omega$ as the parameters), but you'll find much worse sampling performance for this problem if you try any of these options (trust us, we tried!) because the geometry of the posterior surface becomes much less suitable for the sampling algorithm in PyMC.\n",
     "Instead, we can re-weight the samples after running the MCMC to see how the results change under the new prior.\n",
     "Most of the parameter inferences are unaffected by this change (because the data are very constraining!), but the inferred eccentricity (and especially $e\\,\\sin\\omega$) will depend on this choice.\n",
     "The following plots show how these parameter inferences are affected.\n",
diff --git a/docs/tutorials/joint.ipynb b/docs/tutorials/joint.ipynb
index 4ea367e5..104c6fc4 100644
--- a/docs/tutorials/joint.ipynb
+++ b/docs/tutorials/joint.ipynb
@@ -300,9 +300,9 @@
    "id": "4c376919",
    "metadata": {},
    "source": [
-    "## A joint transit and radial velocity model in PyMC3\n",
+    "## A joint transit and radial velocity model in PyMC\n",
     "\n",
-    "Now, let's define our full model in *PyMC3*.\n",
+    "Now, let's define our full model in *PyMC*.\n",
     "There's a lot going on here, but I've tried to comment it and most of it should be familiar from the other tutorials and case studies.\n",
     "In this case, I've put the model inside a model \"factory\" function because we'll do some sigma clipping below."
    ]
@@ -316,11 +316,11 @@
    },
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import aesara_theano_fallback.tensor as tt\n",
+    "import pymc as pm\n",
+    "import pytensor.tensor as pt\n",
     "\n",
-    "import pymc3_ext as pmx\n",
-    "from celerite2.theano import terms, GaussianProcess\n",
+    "import pymc_ext as pmx\n",
+    "from celerite2.pymc import terms, GaussianProcess\n",
     "\n",
     "# These arrays are used as the times/phases where the models are\n",
     "# evaluated at higher resolution for plotting purposes\n",
@@ -333,21 +333,34 @@
     "        mask = np.ones(len(x), dtype=bool)\n",
     "    with pm.Model() as model:\n",
     "        # Parameters for the stellar properties\n",
-    "        mean_flux = pm.Normal(\"mean_flux\", mu=0.0, sd=10.0)\n",
-    "        u_star = xo.QuadLimbDark(\"u_star\")\n",
+    "        mean_flux = pm.Normal(\"mean_flux\", mu=0.0, sigma=10.0)\n",
+    "        u_star = xo.quad_limb_dark(\"u_star\")\n",
     "        star = xo.LimbDarkLightCurve(u_star)\n",
-    "        BoundedNormal = pm.Bound(pm.Normal, lower=0, upper=3)\n",
-    "        m_star = BoundedNormal(\n",
-    "            \"m_star\", mu=M_star_petigura[0], sd=M_star_petigura[1]\n",
+    "        # BoundedNormal = pm.Bound(pm.Normal, lower=0, upper=3)\n",
+    "\n",
+    "        m_star = pm.TruncatedNormal(\n",
+    "            \"m_star\",\n",
+    "            mu=M_star_petigura[0],\n",
+    "            sigma=M_star_petigura[1],\n",
+    "            lower=0,\n",
+    "            upper=3,\n",
     "        )\n",
-    "        r_star = BoundedNormal(\n",
-    "            \"r_star\", mu=R_star_petigura[0], sd=R_star_petigura[1]\n",
+    "        r_star = pm.TruncatedNormal(\n",
+    "            \"r_star\",\n",
+    "            mu=R_star_petigura[0],\n",
+    "            sigma=R_star_petigura[1],\n",
+    "            lower=0,\n",
+    "            upper=3,\n",
     "        )\n",
     "\n",
     "        # Orbital parameters for the planets\n",
-    "        t0 = pm.Normal(\"t0\", mu=np.array(t0s), sd=1, shape=2)\n",
-    "        log_m_pl = pm.Normal(\"log_m_pl\", mu=np.log(msini.value), sd=1, shape=2)\n",
-    "        log_period = pm.Normal(\"log_period\", mu=np.log(periods), sd=1, shape=2)\n",
+    "        t0 = pm.Normal(\"t0\", mu=np.array(t0s), sigma=1, shape=2)\n",
+    "        log_m_pl = pm.Normal(\n",
+    "            \"log_m_pl\", mu=np.log(msini.value), sigma=1, shape=2\n",
+    "        )\n",
+    "        log_period = pm.Normal(\n",
+    "            \"log_period\", mu=np.log(periods), sigma=1, shape=2\n",
+    "        )\n",
     "\n",
     "        # Fit in terms of transit depth (assuming b<1)\n",
     "        b = pm.Uniform(\"b\", lower=0, upper=1, shape=2)\n",
@@ -357,36 +370,38 @@
     "        ror = pm.Deterministic(\n",
     "            \"ror\",\n",
     "            star.get_ror_from_approx_transit_depth(\n",
-    "                1e-3 * tt.exp(log_depth), b\n",
+    "                1e-3 * pt.exp(log_depth), b\n",
     "            ),\n",
     "        )\n",
     "        r_pl = pm.Deterministic(\"r_pl\", ror * r_star)\n",
     "\n",
-    "        m_pl = pm.Deterministic(\"m_pl\", tt.exp(log_m_pl))\n",
-    "        period = pm.Deterministic(\"period\", tt.exp(log_period))\n",
+    "        m_pl = pm.Deterministic(\"m_pl\", pt.exp(log_m_pl))\n",
+    "        period = pm.Deterministic(\"period\", pt.exp(log_period))\n",
     "\n",
-    "        ecs = pmx.UnitDisk(\"ecs\", shape=(2, 2), testval=0.01 * np.ones((2, 2)))\n",
-    "        ecc = pm.Deterministic(\"ecc\", tt.sum(ecs**2, axis=0))\n",
-    "        omega = pm.Deterministic(\"omega\", tt.arctan2(ecs[1], ecs[0]))\n",
+    "        secosw, sesinw = pmx.unit_disk(\n",
+    "            \"secosw\", \"sesinw\", shape=2, initval=0.01 * np.ones((2, 2))\n",
+    "        )\n",
+    "        ecc = pm.Deterministic(\"ecc\", secosw**2 + sesinw**2)\n",
+    "        omega = pm.Deterministic(\"omega\", pt.arctan2(sesinw, secosw))\n",
     "        xo.eccentricity.vaneylen19(\n",
     "            \"ecc_prior\", multi=True, shape=2, fixed=True, observed=ecc\n",
     "        )\n",
     "\n",
     "        # RV jitter & a quadratic RV trend\n",
     "        log_sigma_rv = pm.Normal(\n",
-    "            \"log_sigma_rv\", mu=np.log(np.median(yerr_rv)), sd=5\n",
+    "            \"log_sigma_rv\", mu=np.log(np.median(yerr_rv)), sigma=5\n",
     "        )\n",
     "        trend = pm.Normal(\n",
-    "            \"trend\", mu=0, sd=10.0 ** -np.arange(3)[::-1], shape=3\n",
+    "            \"trend\", mu=0, sigma=10.0 ** -np.arange(3)[::-1], shape=3\n",
     "        )\n",
     "\n",
     "        # Transit jitter & GP parameters\n",
     "        log_sigma_lc = pm.Normal(\n",
-    "            \"log_sigma_lc\", mu=np.log(np.std(y[mask])), sd=10\n",
+    "            \"log_sigma_lc\", mu=np.log(np.std(y[mask])), sigma=10\n",
     "        )\n",
-    "        log_rho_gp = pm.Normal(\"log_rho_gp\", mu=0.0, sd=10)\n",
+    "        log_rho_gp = pm.Normal(\"log_rho_gp\", mu=0.0, sigma=10)\n",
     "        log_sigma_gp = pm.Normal(\n",
-    "            \"log_sigma_gp\", mu=np.log(np.std(y[mask])), sd=10\n",
+    "            \"log_sigma_gp\", mu=np.log(np.std(y[mask])), sigma=10\n",
     "        )\n",
     "\n",
     "        # Orbit models\n",
@@ -407,16 +422,18 @@
     "            * 1e3\n",
     "        )\n",
     "        light_curve = pm.math.sum(light_curves, axis=-1) + mean_flux\n",
-    "        resid = y[mask] - light_curve\n",
+    "        # resid = y[mask] - light_curve\n",
     "\n",
     "        # GP model for the light curve\n",
     "        kernel = terms.SHOTerm(\n",
-    "            sigma=tt.exp(log_sigma_gp),\n",
-    "            rho=tt.exp(log_rho_gp),\n",
+    "            sigma=pt.exp(log_sigma_gp),\n",
+    "            rho=pt.exp(log_rho_gp),\n",
     "            Q=1 / np.sqrt(2),\n",
     "        )\n",
-    "        gp = GaussianProcess(kernel, t=x[mask], yerr=tt.exp(log_sigma_lc))\n",
-    "        gp.marginal(\"transit_obs\", observed=resid)\n",
+    "        gp = GaussianProcess(\n",
+    "            kernel, t=x[mask], yerr=pt.exp(log_sigma_lc), mean=light_curve\n",
+    "        )\n",
+    "        gp.marginal(\"transit_obs\", observed=y[mask])\n",
     "\n",
     "        # And then include the RVs as in the RV tutorial\n",
     "        x_rv_ref = 0.5 * (x_rv.min() + x_rv.max())\n",
@@ -428,11 +445,11 @@
     "\n",
     "            # Define the background model\n",
     "            A = np.vander(t - x_rv_ref, 3)\n",
-    "            bkg = pm.Deterministic(\"bkg\" + name, tt.dot(A, trend))\n",
+    "            bkg = pm.Deterministic(\"bkg\" + name, pt.dot(A, trend))\n",
     "\n",
     "            # Sum over planets and add the background to get the full model\n",
     "            return pm.Deterministic(\n",
-    "                \"rv_model\" + name, tt.sum(vrad, axis=-1) + bkg\n",
+    "                \"rv_model\" + name, pt.sum(vrad, axis=-1) + bkg\n",
     "            )\n",
     "\n",
     "        # Define the model\n",
@@ -440,14 +457,14 @@
     "        get_rv_model(t_rv, name=\"_pred\")\n",
     "\n",
     "        # The likelihood for the RVs\n",
-    "        err = tt.sqrt(yerr_rv**2 + tt.exp(2 * log_sigma_rv))\n",
-    "        pm.Normal(\"obs\", mu=rv_model, sd=err, observed=y_rv)\n",
+    "        err = pt.sqrt(yerr_rv**2 + pt.exp(2 * log_sigma_rv))\n",
+    "        pm.Normal(\"obs\", mu=rv_model, sigma=err, observed=y_rv)\n",
     "\n",
     "        # Compute and save the phased light curve models\n",
     "        pm.Deterministic(\n",
     "            \"lc_pred\",\n",
     "            1e3\n",
-    "            * tt.stack(\n",
+    "            * pt.stack(\n",
     "                [\n",
     "                    star.get_light_curve(\n",
     "                        orbit=orbit, r=r_pl, t=t0[n] + phase_lc, texp=texp\n",
@@ -461,7 +478,7 @@
     "        # Fit for the maximum a posteriori parameters, I've found that I can get\n",
     "        # a better solution by trying different combinations of parameters in turn\n",
     "        if start is None:\n",
-    "            start = model.test_point\n",
+    "            start = model.initial_point()\n",
     "        map_soln = pmx.optimize(start=start, vars=[trend])\n",
     "        map_soln = pmx.optimize(start=map_soln, vars=[log_sigma_lc])\n",
     "        map_soln = pmx.optimize(start=map_soln, vars=[log_depth, b])\n",
@@ -475,7 +492,9 @@
     "        extras = dict(\n",
     "            zip(\n",
     "                [\"light_curves\", \"gp_pred\"],\n",
-    "                pmx.eval_in_model([light_curves, gp.predict(resid)], map_soln),\n",
+    "                pmx.eval_in_model(\n",
+    "                    [light_curves, gp.predict(y[mask])], map_soln\n",
+    "                ),\n",
     "            )\n",
     "        )\n",
     "\n",
@@ -553,7 +572,8 @@
     "\n",
     "    ax = axes[0]\n",
     "    ax.plot(x[mask], y[mask], \"k\", label=\"data\")\n",
-    "    gp_mod = extras[\"gp_pred\"] + soln[\"mean_flux\"]\n",
+    "    lc_mod = np.sum(extras[\"light_curves\"], axis=-1)\n",
+    "    gp_mod = extras[\"gp_pred\"] - lc_mod\n",
     "    ax.plot(x[mask], gp_mod, color=\"C2\", label=\"gp model\")\n",
     "    ax.legend(fontsize=10)\n",
     "    ax.set_ylabel(\"relative flux [ppt]\")\n",
@@ -567,7 +587,7 @@
     "    ax.set_ylabel(\"de-trended flux [ppt]\")\n",
     "\n",
     "    ax = axes[2]\n",
-    "    mod = gp_mod + np.sum(extras[\"light_curves\"], axis=-1)\n",
+    "    mod = gp_mod + lc_mod\n",
     "    ax.plot(x[mask], y[mask] - mod, \"k\")\n",
     "    ax.axhline(0, color=\"#aaaaaa\", lw=1)\n",
     "    ax.set_ylabel(\"residuals [ppt]\")\n",
@@ -599,11 +619,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "mod = (\n",
-    "    extras0[\"gp_pred\"]\n",
-    "    + map_soln0[\"mean_flux\"]\n",
-    "    + np.sum(extras0[\"light_curves\"], axis=-1)\n",
-    ")\n",
+    "mod = extras0[\"gp_pred\"]\n",
     "resid = y - mod\n",
     "rms = np.sqrt(np.median(resid**2))\n",
     "mask = np.abs(resid) < 7 * rms\n",
@@ -664,7 +680,7 @@
     "    trace = pm.sample(\n",
     "        tune=1500,\n",
     "        draws=1000,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        cores=2,\n",
     "        chains=2,\n",
     "        target_accept=0.95,\n",
@@ -746,7 +762,8 @@
    "outputs": [],
    "source": [
     "flat_samps = trace.posterior.stack(sample=(\"chain\", \"draw\"))\n",
-    "gp_mod = extras[\"gp_pred\"] + map_soln[\"mean_flux\"]\n",
+    "lc_mod = np.sum(extras[\"light_curves\"], axis=-1)\n",
+    "gp_mod = extras[\"gp_pred\"] - lc_mod\n",
     "\n",
     "for n, letter in enumerate(\"bc\"):\n",
     "    plt.figure()\n",
diff --git a/docs/tutorials/lc-gp-transit.ipynb b/docs/tutorials/lc-gp-transit.ipynb
index efd1017c..de5c5d3d 100644
--- a/docs/tutorials/lc-gp-transit.ipynb
+++ b/docs/tutorials/lc-gp-transit.ipynb
@@ -131,10 +131,10 @@
    },
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import pymc3_ext as pmx\n",
-    "import aesara_theano_fallback.tensor as tt\n",
-    "from celerite2.theano import terms, GaussianProcess\n",
+    "import pymc as pm\n",
+    "import pymc_ext as pmx\n",
+    "import pytensor.tensor as pt\n",
+    "from celerite2.pymc import terms, GaussianProcess\n",
     "\n",
     "from astropy import units as units, constants as const\n",
     "\n",
@@ -145,12 +145,12 @@
     "\n",
     "    with pm.Model() as model:\n",
     "        # Shared parameters\n",
-    "        mean = pm.Normal(\"mean\", mu=0, sd=1, testval=0)\n",
+    "        mean = pm.Normal(\"mean\", mu=0, sigma=1, initval=0)\n",
     "\n",
     "        # Stellar parameters.  These are usually determined from spectroscopy\n",
     "        # and/or isochrone fits.\n",
-    "        logg_star = pm.Normal(\"logg_star\", mu=4.53, sd=0.05)\n",
-    "        r_star = pm.Normal(\"r_star\", mu=0.881, sd=0.018)\n",
+    "        logg_star = pm.Normal(\"logg_star\", mu=4.53, sigma=0.05)\n",
+    "        r_star = pm.Normal(\"r_star\", mu=0.881, sigma=0.018)\n",
     "\n",
     "        # Here \"factor\" is defined s.t. factor * 10**logg / r_star = rho\n",
     "        factor = 5.141596357654149e-05\n",
@@ -159,7 +159,7 @@
     "        )\n",
     "\n",
     "        # Limb-darkening: adopt Kipping 2013.\n",
-    "        u_star = xo.QuadLimbDark(\"u_star\")\n",
+    "        u_star = xo.quad_limb_dark(\"u_star\")\n",
     "        star = xo.LimbDarkLightCurve(u_star)\n",
     "\n",
     "        # To get Rp/R*, fit for log(depth).  This requires an impact parameter\n",
@@ -170,7 +170,7 @@
     "        b = pm.Uniform(\"b\", lower=0, upper=1)\n",
     "\n",
     "        log_depth = pm.Normal(\"log_depth\", mu=np.log(1.8e-3), sigma=1)\n",
-    "        depth = pm.Deterministic(\"depth\", tt.exp(log_depth))\n",
+    "        depth = pm.Deterministic(\"depth\", pt.exp(log_depth))\n",
     "\n",
     "        ror = pm.Deterministic(\n",
     "            \"ror\",\n",
@@ -179,14 +179,16 @@
     "        r_pl = pm.Deterministic(\"r_pl\", ror * r_star)\n",
     "\n",
     "        # Orbital parameters for the planet.  Use mean values from Holczer+16.\n",
-    "        t0 = pm.Normal(\"t0\", mu=120.790, sd=0.02, testval=120.790)\n",
-    "        period = pm.Normal(\"period\", mu=7.203, sd=0.01, testval=7.203)\n",
+    "        t0 = pm.Normal(\"t0\", mu=120.790, sigma=0.02, initval=120.790)\n",
+    "        period = pm.Normal(\"period\", mu=7.203, sigma=0.01, initval=7.203)\n",
     "\n",
     "        # Let the eccentricity float, and use the eccentricity distribution\n",
     "        # from https://arxiv.org/abs/1807.00549 as our prior.\n",
-    "        ecs = pmx.UnitDisk(\"ecs\", testval=np.array([0.01, 0.0]))\n",
-    "        ecc = pm.Deterministic(\"ecc\", tt.sum(ecs**2))\n",
-    "        omega = pm.Deterministic(\"omega\", tt.arctan2(ecs[1], ecs[0]))\n",
+    "        secosw, sesinw = pmx.unit_disk(\n",
+    "            \"secosw\", \"sesinw\", initval=np.array([0.01, 0.0])\n",
+    "        )\n",
+    "        ecc = pm.Deterministic(\"ecc\", secosw**2 + sesinw**2)\n",
+    "        omega = pm.Deterministic(\"omega\", pt.arctan2(sesinw, secosw))\n",
     "        xo.eccentricity.vaneylen19(\n",
     "            \"ecc_prior\", multi=False, shape=1, fixed=True, observed=ecc\n",
     "        )\n",
@@ -204,7 +206,7 @@
     "            omega=omega,\n",
     "        )\n",
     "\n",
-    "        transit_model = mean + tt.sum(\n",
+    "        transit_model = mean + pt.sum(\n",
     "            star.get_light_curve(orbit=orbit, r=r_pl, t=x[mask], texp=texp),\n",
     "            axis=-1,\n",
     "        )\n",
@@ -219,17 +221,17 @@
     "        # https://gallery.exoplanet.codes/en/latest/tutorials/stellar-variability/\n",
     "\n",
     "        # A jitter term describing excess white noise\n",
-    "        log_jitter = pm.Normal(\"log_jitter\", mu=np.log(np.mean(yerr)), sd=2)\n",
+    "        log_jitter = pm.Normal(\"log_jitter\", mu=np.log(np.mean(yerr)), sigma=2)\n",
     "\n",
     "        # The parameters of the RotationTerm kernel\n",
     "        sigma_rot = pm.InverseGamma(\n",
-    "            \"sigma_rot\", **pmx.estimate_inverse_gamma_parameters(1, 5)\n",
+    "            \"sigma_rot\", **pmx.utils.estimate_inverse_gamma_parameters(1, 5)\n",
     "        )\n",
     "        # Rotation period is 2.6 days, from Lomb Scargle\n",
-    "        log_prot = pm.Normal(\"log_prot\", mu=np.log(2.606418), sd=0.02)\n",
-    "        prot = pm.Deterministic(\"prot\", tt.exp(log_prot))\n",
-    "        log_Q0 = pm.Normal(\"log_Q0\", mu=0, sd=2)\n",
-    "        log_dQ = pm.Normal(\"log_dQ\", mu=0, sd=2)\n",
+    "        log_prot = pm.Normal(\"log_prot\", mu=np.log(2.606418), sigma=0.02)\n",
+    "        prot = pm.Deterministic(\"prot\", pt.exp(log_prot))\n",
+    "        log_Q0 = pm.Normal(\"log_Q0\", mu=0, sigma=2)\n",
+    "        log_dQ = pm.Normal(\"log_dQ\", mu=0, sigma=2)\n",
     "        f = pm.Uniform(\"f\", lower=0.01, upper=1)\n",
     "\n",
     "        # Set up the Gaussian Process model. See\n",
@@ -238,8 +240,8 @@
     "        kernel = terms.RotationTerm(\n",
     "            sigma=sigma_rot,\n",
     "            period=prot,\n",
-    "            Q0=tt.exp(log_Q0),\n",
-    "            dQ=tt.exp(log_dQ),\n",
+    "            Q0=pt.exp(log_Q0),\n",
+    "            dQ=pt.exp(log_dQ),\n",
     "            f=f,\n",
     "        )\n",
     "        #\n",
@@ -250,16 +252,17 @@
     "        gp = GaussianProcess(\n",
     "            kernel,\n",
     "            t=x[mask],\n",
-    "            diag=yerr[mask] ** 2 + tt.exp(2 * log_jitter),\n",
+    "            diag=yerr[mask] ** 2 + pt.exp(2 * log_jitter),\n",
     "            quiet=True,\n",
+    "            mean=transit_model,\n",
     "        )\n",
     "\n",
     "        # Compute the Gaussian Process likelihood and add it into the\n",
-    "        # the PyMC3 model as a \"potential\"\n",
-    "        gp.marginal(\"transit_obs\", observed=y[mask] - transit_model)\n",
+    "        # the PyMC model as a \"potential\"\n",
+    "        gp.marginal(\"transit_obs\", observed=y[mask])\n",
     "\n",
     "        # Compute the GP model prediction for plotting purposes\n",
-    "        pm.Deterministic(\"gp_pred\", gp.predict(y[mask] - transit_model))\n",
+    "        pm.Deterministic(\"gp_pred\", gp.predict(y[mask], include_mean=False))\n",
     "\n",
     "        # Track planet radius in Jovian radii\n",
     "        r_planet = pm.Deterministic(\n",
@@ -269,7 +272,7 @@
     "\n",
     "        # Optimize the MAP solution.\n",
     "        if start is None:\n",
-    "            start = model.test_point\n",
+    "            start = model.initial_point()\n",
     "\n",
     "        map_soln = start\n",
     "\n",
@@ -536,7 +539,7 @@
     "        trace = pm.sample(\n",
     "            tune=1500,\n",
     "            draws=1000,\n",
-    "            start=map_estimate,\n",
+    "            initvals=map_estimate,\n",
     "            # Parallel sampling runs poorly or crashes on macos\n",
     "            cores=1 if platform.system() == \"Darwin\" else 2,\n",
     "            chains=2,\n",
diff --git a/docs/tutorials/lc-multi.ipynb b/docs/tutorials/lc-multi.ipynb
index d35ae4fb..b2a5896d 100644
--- a/docs/tutorials/lc-multi.ipynb
+++ b/docs/tutorials/lc-multi.ipynb
@@ -154,12 +154,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import pymc3_ext as pmx\n",
+    "import pymc as pm\n",
+    "import pymc_ext as pmx\n",
     "import exoplanet as xo\n",
-    "import aesara_theano_fallback.tensor as tt\n",
+    "import pytensor.tensor as pt\n",
     "from functools import partial\n",
-    "from celerite2.theano import terms, GaussianProcess\n",
+    "from celerite2.pymc import terms, GaussianProcess\n",
     "\n",
     "# Find a reference transit time near the middle of the observations to avoid\n",
     "# strong covariances between period and t0\n",
@@ -173,12 +173,12 @@
     "    with pm.Model() as model:\n",
     "        # Shared orbital parameters\n",
     "        log_period = pm.Normal(\"log_period\", mu=np.log(lit_period), sigma=1.0)\n",
-    "        period = pm.Deterministic(\"period\", tt.exp(log_period))\n",
+    "        period = pm.Deterministic(\"period\", pt.exp(log_period))\n",
     "        t0 = pm.Normal(\"t0\", mu=t0_ref, sigma=1.0)\n",
     "        log_dur = pm.Normal(\"log_dur\", mu=np.log(0.1), sigma=10.0)\n",
-    "        dur = pm.Deterministic(\"dur\", tt.exp(log_dur))\n",
-    "        b = pmx.UnitUniform(\"b\")\n",
-    "        ld_arg = 1 - tt.sqrt(1 - b**2)\n",
+    "        dur = pm.Deterministic(\"dur\", pt.exp(log_dur))\n",
+    "        b = pm.Uniform(\"b\", lower=0.0, upper=1.0)\n",
+    "        ld_arg = 1 - pt.sqrt(1 - b**2)\n",
     "        orbit = xo.orbits.KeplerianOrbit(\n",
     "            period=period, duration=dur, t0=t0, b=b\n",
     "        )\n",
@@ -186,8 +186,8 @@
     "        # We'll also say that the timescale of the GP will be shared\n",
     "        rho_gp = pm.InverseGamma(\n",
     "            \"rho_gp\",\n",
-    "            testval=2.0,\n",
-    "            **pmx.estimate_inverse_gamma_parameters(1.0, 5.0),\n",
+    "            initval=2.0,\n",
+    "            **pmx.utils.estimate_inverse_gamma_parameters(1.0, 5.0),\n",
     "        )\n",
     "\n",
     "        # Loop over the instruments\n",
@@ -203,8 +203,8 @@
     "                mean = pm.Normal(\"mean\", mu=0.0, sigma=10.0)\n",
     "\n",
     "                # The limb darkening\n",
-    "                u = xo.QuadLimbDark(\"u\")\n",
-    "                star = xo.LimbDarkLightCurve(u)\n",
+    "                u = xo.quad_limb_dark(\"u\")\n",
+    "                star = xo.LimbDarkLightCurve(*u)\n",
     "\n",
     "                # The radius ratio\n",
     "                log_approx_depth = pm.Normal(\n",
@@ -212,7 +212,7 @@
     "                )\n",
     "                ld = 1 - u[0] * ld_arg - u[1] * ld_arg**2\n",
     "                ror = pm.Deterministic(\n",
-    "                    \"ror\", tt.exp(0.5 * log_approx_depth) / tt.sqrt(ld)\n",
+    "                    \"ror\", pt.exp(0.5 * log_approx_depth) / pt.sqrt(ld)\n",
     "                )\n",
     "\n",
     "                # Noise parameters\n",
@@ -220,15 +220,15 @@
     "                std = np.std(y)\n",
     "                sigma = pm.InverseGamma(\n",
     "                    \"sigma\",\n",
-    "                    testval=med_yerr,\n",
-    "                    **pmx.estimate_inverse_gamma_parameters(\n",
+    "                    initval=med_yerr,\n",
+    "                    **pmx.utils.estimate_inverse_gamma_parameters(\n",
     "                        med_yerr, 0.5 * std\n",
     "                    ),\n",
     "                )\n",
     "                sigma_gp = pm.InverseGamma(\n",
     "                    \"sigma_gp\",\n",
-    "                    testval=0.5 * std,\n",
-    "                    **pmx.estimate_inverse_gamma_parameters(\n",
+    "                    initval=0.5 * std,\n",
+    "                    **pmx.utils.estimate_inverse_gamma_parameters(\n",
     "                        med_yerr, 0.5 * std\n",
     "                    ),\n",
     "                )\n",
@@ -239,7 +239,7 @@
     "\n",
     "            # The light curve model\n",
     "            def lc_model(mean, star, ror, texp, t):\n",
-    "                return mean + 1e3 * tt.sum(\n",
+    "                return mean + 1e3 * pt.sum(\n",
     "                    star.get_light_curve(orbit=orbit, r=ror, t=t, texp=texp),\n",
     "                    axis=-1,\n",
     "                )\n",
@@ -250,14 +250,14 @@
     "            # The Gaussian Process noise model\n",
     "            kernel = terms.SHOTerm(sigma=sigma_gp, rho=rho_gp, Q=1.0 / 3)\n",
     "            gp = GaussianProcess(\n",
-    "                kernel, t=x, diag=yerr**2 + sigma**2, mean=lc_model\n",
+    "                kernel, t=x, diag=yerr**2 + sigma**2, mean=lc_model(x)\n",
     "            )\n",
     "            gp.marginal(f\"{name}_obs\", observed=y)\n",
     "            gp_preds[name] = gp.predict(y, include_mean=False)\n",
     "            gp_preds_with_mean[name] = gp_preds[name] + gp.mean_value\n",
     "\n",
     "        # Optimize the model\n",
-    "        map_soln = model.test_point\n",
+    "        map_soln = model.initial_point()\n",
     "        for name in datasets:\n",
     "            map_soln = pmx.optimize(map_soln, parameters[name])\n",
     "        for name in datasets:\n",
@@ -323,13 +323,13 @@
     "    m = np.abs(folded) < 0.2\n",
     "    ax.plot(\n",
     "        folded[m],\n",
-    "        (y - trends[n] - map_soln[f\"{name}_mean\"])[m],\n",
+    "        (y - trends[n] - map_soln[f\"{name}::mean\"])[m],\n",
     "        \".k\",\n",
     "        alpha=0.3,\n",
     "        mec=\"none\",\n",
     "    )\n",
     "    ax.plot(\n",
-    "        dt, phase_curves[n] - map_soln[f\"{name}_mean\"], f\"C{n}\", label=name\n",
+    "        dt, phase_curves[n] - map_soln[f\"{name}::mean\"], f\"C{n}\", label=name\n",
     "    )\n",
     "    ax.annotate(\n",
     "        name,\n",
@@ -366,10 +366,10 @@
     "import platform\n",
     "\n",
     "with model:\n",
-    "    trace = pmx.sample(\n",
+    "    trace = pmx.utils.sample(\n",
     "        tune=1500,\n",
     "        draws=1500,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        # Parallel sampling runs poorly or crashes on macos\n",
     "        cores=1 if platform.system() == \"Darwin\" else 2,\n",
     "        chains=2,\n",
@@ -416,14 +416,14 @@
    "outputs": [],
    "source": [
     "plt.hist(\n",
-    "    trace.posterior[\"Kepler_ror\"].values.flatten(),\n",
+    "    trace.posterior[\"Kepler::ror\"].values.flatten(),\n",
     "    20,\n",
     "    density=True,\n",
     "    histtype=\"step\",\n",
     "    label=\"Kepler\",\n",
     ")\n",
     "plt.hist(\n",
-    "    trace.posterior[\"TESS_ror\"].values.flatten(),\n",
+    "    trace.posterior[\"TESS::ror\"].values.flatten(),\n",
     "    20,\n",
     "    density=True,\n",
     "    histtype=\"step\",\n",
@@ -453,14 +453,14 @@
     "\n",
     "fig = corner.corner(\n",
     "    trace,\n",
-    "    var_names=[\"TESS_u\"],\n",
+    "    var_names=[\"TESS::u\"],\n",
     "    bins=40,\n",
     "    color=\"C1\",\n",
     "    range=((0.5, 0.9), (-0.5, 0.1)),\n",
     ")\n",
     "corner.corner(\n",
     "    trace,\n",
-    "    var_names=[\"Kepler_u\"],\n",
+    "    var_names=[\"Kepler::u\"],\n",
     "    bins=40,\n",
     "    color=\"C0\",\n",
     "    fig=fig,\n",
diff --git a/docs/tutorials/quick-tess.ipynb b/docs/tutorials/quick-tess.ipynb
index 2554be3e..e5b80bc6 100644
--- a/docs/tutorials/quick-tess.ipynb
+++ b/docs/tutorials/quick-tess.ipynb
@@ -151,7 +151,7 @@
     "\n",
     "## The probabilistic model\n",
     "\n",
-    "Here's how we set up the PyMC3 model in this case:"
+    "Here's how we set up the PyMC model in this case:"
    ]
   },
   {
@@ -161,17 +161,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import aesara_theano_fallback.tensor as tt\n",
+    "import pymc as pm\n",
+    "import pytensor.tensor as pt\n",
     "\n",
-    "import pymc3_ext as pmx\n",
-    "from celerite2.theano import terms, GaussianProcess\n",
+    "import pymc_ext as pmx\n",
+    "from celerite2.pymc import terms, GaussianProcess\n",
     "\n",
     "\n",
     "with pm.Model() as model:\n",
     "    # Stellar parameters\n",
     "    mean = pm.Normal(\"mean\", mu=0.0, sigma=10.0)\n",
-    "    u = xo.QuadLimbDark(\"u\")\n",
+    "    u = xo.quad_limb_dark(\"u\")\n",
     "    star_params = [mean, u]\n",
     "\n",
     "    # Gaussian process noise model\n",
@@ -179,7 +179,7 @@
     "    log_sigma_gp = pm.Normal(\"log_sigma_gp\", mu=0.0, sigma=10.0)\n",
     "    log_rho_gp = pm.Normal(\"log_rho_gp\", mu=np.log(10.0), sigma=10.0)\n",
     "    kernel = terms.SHOTerm(\n",
-    "        sigma=tt.exp(log_sigma_gp), rho=tt.exp(log_rho_gp), Q=1.0 / 3\n",
+    "        sigma=pt.exp(log_sigma_gp), rho=pt.exp(log_rho_gp), Q=1.0 / 3\n",
     "    )\n",
     "    noise_params = [sigma, log_sigma_gp, log_rho_gp]\n",
     "\n",
@@ -187,37 +187,39 @@
     "    log_ror = pm.Normal(\n",
     "        \"log_ror\", mu=0.5 * np.log(depth_guess * 1e-3), sigma=10.0\n",
     "    )\n",
-    "    ror = pm.Deterministic(\"ror\", tt.exp(log_ror))\n",
+    "    ror = pm.Deterministic(\"ror\", pt.exp(log_ror))\n",
     "\n",
     "    # Orbital parameters\n",
     "    log_period = pm.Normal(\"log_period\", mu=np.log(period_guess), sigma=1.0)\n",
-    "    period = pm.Deterministic(\"period\", tt.exp(log_period))\n",
+    "    period = pm.Deterministic(\"period\", pt.exp(log_period))\n",
     "    t0 = pm.Normal(\"t0\", mu=t0_guess, sigma=1.0)\n",
     "    log_dur = pm.Normal(\"log_dur\", mu=np.log(0.1), sigma=10.0)\n",
-    "    dur = pm.Deterministic(\"dur\", tt.exp(log_dur))\n",
-    "    b = xo.distributions.ImpactParameter(\"b\", ror=ror)\n",
+    "    dur = pm.Deterministic(\"dur\", pt.exp(log_dur))\n",
+    "    b = xo.distributions.impact_parameter(\"b\", ror=ror)\n",
     "\n",
     "    # Set up the orbit\n",
-    "    orbit = xo.orbits.KeplerianOrbit(period=period, duration=dur, t0=t0, b=b)\n",
+    "    orbit = xo.orbits.KeplerianOrbit(\n",
+    "        period=period, duration=dur, t0=t0, b=b, ror=ror\n",
+    "    )\n",
     "\n",
     "    # We're going to track the implied density for reasons that will become clear later\n",
     "    pm.Deterministic(\"rho_circ\", orbit.rho_star)\n",
     "\n",
     "    # Set up the mean transit model\n",
     "    star = xo.LimbDarkLightCurve(u)\n",
-    "    lc_model = mean + 1e3 * tt.sum(\n",
+    "    lc_model = mean + 1e3 * pt.sum(\n",
     "        star.get_light_curve(orbit=orbit, r=ror, t=x), axis=-1\n",
     "    )\n",
     "\n",
     "    # Finally the GP observation model\n",
-    "    gp = GaussianProcess(kernel, t=x, diag=yerr**2 + sigma**2)\n",
-    "    gp.marginal(\"obs\", observed=y - lc_model)\n",
+    "    gp = GaussianProcess(kernel, t=x, diag=yerr**2 + sigma**2, mean=lc_model)\n",
+    "    gp.marginal(\"obs\", observed=y)\n",
     "\n",
     "    # Double check that everything looks good - we shouldn't see any NaNs!\n",
-    "    print(model.check_test_point())\n",
+    "    print(model.point_logps())\n",
     "\n",
     "    # Optimize the model\n",
-    "    map_soln = model.test_point\n",
+    "    map_soln = model.initial_point()\n",
     "    map_soln = pmx.optimize(map_soln, [sigma])\n",
     "    map_soln = pmx.optimize(map_soln, [ror, b, dur])\n",
     "    map_soln = pmx.optimize(map_soln, noise_params)\n",
@@ -242,7 +244,7 @@
    "source": [
     "with model:\n",
     "    lc_pred = pmx.eval_in_model(lc_model, map_soln)\n",
-    "    gp_pred = pmx.eval_in_model(gp.predict(y - lc_pred), map_soln)\n",
+    "    gp_pred = pmx.eval_in_model(gp.predict(y), map_soln) - lc_pred\n",
     "\n",
     "plt.figure(figsize=(8, 4))\n",
     "x_fold = (x - map_soln[\"t0\"] + 0.5 * map_soln[\"period\"]) % map_soln[\n",
@@ -275,10 +277,10 @@
    "outputs": [],
    "source": [
     "with model:\n",
-    "    trace = pmx.sample(\n",
+    "    trace = pmx.utils.sample(\n",
     "        tune=1000,\n",
     "        draws=1000,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        chains=2,\n",
     "        cores=2,\n",
     "        return_inferencedata=True,\n",
diff --git a/docs/tutorials/rv-multi.ipynb b/docs/tutorials/rv-multi.ipynb
index b5a9f440..e9de1f69 100644
--- a/docs/tutorials/rv-multi.ipynb
+++ b/docs/tutorials/rv-multi.ipynb
@@ -28,9 +28,9 @@
    "id": "7728bae8-f878-4ed8-8a14-c0dfd78b4b33",
    "metadata": {},
    "source": [
-    "In this case study, we will look at how we can use exoplanet and PyMC3 to combine datasets from different RV instruments to fit the orbit of an exoplanet system.\n",
-    "Before getting started, I want to emphasize that the exoplanet code doesn't have strong opinions about how your data are collected, it only provides extensions that allow PyMC3 to evaluate some astronomy-specific functions.\n",
-    "This means that you can build any kind of observation model that PyMC3 supports, and support for multiple instruments isn't really a *feature* of exoplanet, even though it is easy to implement.\n",
+    "In this case study, we will look at how we can use exoplanet and PyMC to combine datasets from different RV instruments to fit the orbit of an exoplanet system.\n",
+    "Before getting started, I want to emphasize that the exoplanet code doesn't have strong opinions about how your data are collected, it only provides extensions that allow PyMC to evaluate some astronomy-specific functions.\n",
+    "This means that you can build any kind of observation model that PyMC supports, and support for multiple instruments isn't really a *feature* of exoplanet, even though it is easy to implement.\n",
     "\n",
     "For the example, we'll use public observations of Pi Mensae which hosts two planets, but we'll ignore the inner planet because the significance of the RV signal is small enough that it won't affect our results.\n",
     "The datasets that we'll use are from the Anglo-Australian Planet Search (AAT) and the HARPS archive.\n",
@@ -110,12 +110,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
+    "import pymc as pm\n",
     "import exoplanet as xo\n",
-    "import aesara_theano_fallback.tensor as tt\n",
+    "import pytensor.tensor as pt\n",
     "\n",
-    "import pymc3_ext as pmx\n",
-    "from celerite2.theano import terms, GaussianProcess\n",
+    "import pymc_ext as pmx\n",
+    "from celerite2.pymc import terms, GaussianProcess\n",
     "\n",
     "t_phase = np.linspace(-0.5, 0.5, 5000)\n",
     "\n",
@@ -123,13 +123,15 @@
     "    # Parameters describing the orbit\n",
     "    log_K = pm.Normal(\"log_K\", mu=np.log(300), sigma=10)\n",
     "    log_P = pm.Normal(\"log_P\", mu=np.log(2093.07), sigma=10)\n",
-    "    K = pm.Deterministic(\"K\", tt.exp(log_K))\n",
-    "    P = pm.Deterministic(\"P\", tt.exp(log_P))\n",
+    "    K = pm.Deterministic(\"K\", pt.exp(log_K))\n",
+    "    P = pm.Deterministic(\"P\", pt.exp(log_P))\n",
     "\n",
-    "    ecs = pmx.UnitDisk(\"ecs\", testval=np.array([0.7, -0.3]))\n",
-    "    ecc = pm.Deterministic(\"ecc\", tt.sum(ecs**2))\n",
-    "    omega = pm.Deterministic(\"omega\", tt.arctan2(ecs[1], ecs[0]))\n",
-    "    phase = pmx.UnitUniform(\"phase\")\n",
+    "    secosw, sesinw = pmx.unit_disk(\n",
+    "        \"secosw\", \"sesinw\", initval=np.array([0.7, -0.3])\n",
+    "    )\n",
+    "    ecc = pm.Deterministic(\"ecc\", secosw**2 + sesinw**2)\n",
+    "    omega = pm.Deterministic(\"omega\", pt.arctan2(sesinw, secosw))\n",
+    "    phase = pm.Uniform(\"phase\", lower=0, upper=1)\n",
     "    tp = pm.Deterministic(\"tp\", 0.5 * (t.min() + t.max()) + phase * P)\n",
     "\n",
     "    orbit = xo.orbits.KeplerianOrbit(\n",
@@ -150,31 +152,30 @@
     "    sigmas = pm.HalfNormal(\"sigmas\", sigma=10, shape=num_inst)\n",
     "\n",
     "    # Compute the RV offset and jitter for each data point depending on its instrument\n",
-    "    mean = tt.zeros(len(t))\n",
-    "    diag = tt.zeros(len(t))\n",
+    "    mean = pt.zeros(len(t))\n",
+    "    diag = pt.zeros(len(t))\n",
     "    for i in range(len(inst_names)):\n",
     "        mean += means[i] * (inst_id == i)\n",
     "        diag += (rv_err**2 + sigmas[i] ** 2) * (inst_id == i)\n",
     "    pm.Deterministic(\"mean\", mean)\n",
     "    pm.Deterministic(\"diag\", diag)\n",
-    "    resid = rv - mean\n",
     "\n",
     "    def rv_model(x):\n",
     "        return orbit.get_radial_velocity(x, K=K)\n",
     "\n",
     "    kernel = terms.SHOTerm(\n",
-    "        sigma=tt.exp(log_sigma_gp), rho=tt.exp(log_rho_gp), Q=1.0 / 3\n",
+    "        sigma=pt.exp(log_sigma_gp), rho=pt.exp(log_rho_gp), Q=1.0 / 3\n",
     "    )\n",
-    "    gp = GaussianProcess(kernel, t=t, diag=diag, mean=rv_model)\n",
-    "    gp.marginal(\"obs\", observed=resid)\n",
-    "    pm.Deterministic(\"gp_pred\", gp.predict(resid, include_mean=False))\n",
+    "    gp = GaussianProcess(kernel, t=t, diag=diag, mean=rv_model(t) + mean)\n",
+    "    gp.marginal(\"obs\", observed=rv)\n",
+    "    pm.Deterministic(\"gp_pred\", gp.predict(rv, include_mean=False))\n",
     "    pm.Deterministic(\"rv_phase\", rv_model(P * t_phase + tp))\n",
     "\n",
-    "    map_soln = model.test_point\n",
+    "    map_soln = model.initial_point()\n",
     "    map_soln = pmx.optimize(map_soln, [means])\n",
     "    map_soln = pmx.optimize(map_soln, [means, phase])\n",
     "    map_soln = pmx.optimize(map_soln, [means, phase, log_K])\n",
-    "    map_soln = pmx.optimize(map_soln, [means, tp, K, log_P, ecs])\n",
+    "    map_soln = pmx.optimize(map_soln, [means, tp, K, log_P, secosw, sesinw])\n",
     "    map_soln = pmx.optimize(map_soln, [sigmas, log_sigma_gp, log_rho_gp])\n",
     "    map_soln = pmx.optimize(map_soln)"
    ]
@@ -227,10 +228,10 @@
    "outputs": [],
    "source": [
     "with model:\n",
-    "    trace = pmx.sample(\n",
+    "    trace = pmx.utils.sample(\n",
     "        tune=1000,\n",
     "        draws=1000,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        chains=2,\n",
     "        cores=2,\n",
     "        return_inferencedata=True,\n",
diff --git a/docs/tutorials/rv.ipynb b/docs/tutorials/rv.ipynb
index 5cfbceea..b6d87c1a 100644
--- a/docs/tutorials/rv.ipynb
+++ b/docs/tutorials/rv.ipynb
@@ -116,9 +116,9 @@
    "id": "7f817fba",
    "metadata": {},
    "source": [
-    "## The radial velocity model in PyMC3\n",
+    "## The radial velocity model in PyMC\n",
     "\n",
-    "Now that we have the data and an estimate of the initial values for the parameters, let's start defining the probabilistic model in PyMC3.\n",
+    "Now that we have the data and an estimate of the initial values for the parameters, let's start defining the probabilistic model in PyMC.\n",
     "First, we'll define our priors on the parameters:"
    ]
   },
@@ -129,38 +129,42 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import pymc3_ext as pmx\n",
-    "import aesara_theano_fallback.tensor as tt\n",
+    "import pymc as pm\n",
+    "import pymc_ext as pmx\n",
+    "import pytensor.tensor as pt\n",
     "\n",
     "with pm.Model() as model:\n",
     "    # Gaussian priors based on transit data (from Petigura et al.)\n",
-    "    t0 = pm.Normal(\"t0\", mu=np.array(t0s), sd=np.array(t0_errs), shape=2)\n",
+    "    t0 = pm.Normal(\"t0\", mu=np.array(t0s), sigma=np.array(t0_errs), shape=2)\n",
     "    logP = pm.Normal(\n",
     "        \"logP\",\n",
     "        mu=np.log(periods),\n",
-    "        sd=np.array(period_errs) / np.array(periods),\n",
+    "        sigma=np.array(period_errs) / np.array(periods),\n",
     "        shape=2,\n",
-    "        testval=np.log(periods),\n",
+    "        initval=np.log(periods),\n",
     "    )\n",
-    "    P = pm.Deterministic(\"P\", tt.exp(logP))\n",
+    "    P = pm.Deterministic(\"P\", pt.exp(logP))\n",
     "\n",
     "    # Wide log-normal prior for semi-amplitude\n",
     "    logK = pm.Normal(\n",
-    "        \"logK\", mu=np.log(Ks), sd=2.0, shape=2, testval=np.log(Ks)\n",
+    "        \"logK\", mu=np.log(Ks), sigma=2.0, shape=2, initval=np.log(Ks)\n",
     "    )\n",
     "\n",
     "    # Eccentricity & argument of periasteron\n",
-    "    ecs = pmx.UnitDisk(\"ecs\", shape=(2, 2), testval=0.01 * np.ones((2, 2)))\n",
-    "    ecc = pm.Deterministic(\"ecc\", tt.sum(ecs**2, axis=0))\n",
-    "    omega = pm.Deterministic(\"omega\", tt.arctan2(ecs[1], ecs[0]))\n",
+    "    secosw, sesinw = pmx.unit_disk(\n",
+    "        \"secosw\", \"sesinw\", shape=2, initval=0.01 * np.ones((2, 2))\n",
+    "    )\n",
+    "    ecc = pm.Deterministic(\"ecc\", secosw**2 + sesinw**2)\n",
+    "    omega = pm.Deterministic(\"omega\", pt.arctan2(sesinw, secosw))\n",
     "    xo.eccentricity.vaneylen19(\n",
     "        \"ecc_prior\", multi=True, shape=2, fixed=True, observed=ecc\n",
     "    )\n",
     "\n",
     "    # Jitter & a quadratic RV trend\n",
-    "    logs = pm.Normal(\"logs\", mu=np.log(np.median(yerr)), sd=5.0)\n",
-    "    trend = pm.Normal(\"trend\", mu=0, sd=10.0 ** -np.arange(3)[::-1], shape=3)\n",
+    "    logs = pm.Normal(\"logs\", mu=np.log(np.median(yerr)), sigma=5.0)\n",
+    "    trend = pm.Normal(\n",
+    "        \"trend\", mu=0, sigma=10.0 ** -np.arange(3)[::-1], shape=3\n",
+    "    )\n",
     "\n",
     "    # Then we define the orbit\n",
     "    orbit = xo.orbits.KeplerianOrbit(period=P, t0=t0, ecc=ecc, omega=omega)\n",
@@ -168,15 +172,15 @@
     "    # And a function for computing the full RV model\n",
     "    def get_rv_model(t, name=\"\"):\n",
     "        # First the RVs induced by the planets\n",
-    "        vrad = orbit.get_radial_velocity(t, K=tt.exp(logK))\n",
+    "        vrad = orbit.get_radial_velocity(t, K=pt.exp(logK))\n",
     "        pm.Deterministic(\"vrad\" + name, vrad)\n",
     "\n",
     "        # Define the background model\n",
     "        A = np.vander(t - x_ref, 3)\n",
-    "        bkg = pm.Deterministic(\"bkg\" + name, tt.dot(A, trend))\n",
+    "        bkg = pm.Deterministic(\"bkg\" + name, pt.dot(A, trend))\n",
     "\n",
     "        # Sum over planets and add the background to get the full model\n",
-    "        return pm.Deterministic(\"rv_model\" + name, tt.sum(vrad, axis=-1) + bkg)\n",
+    "        return pm.Deterministic(\"rv_model\" + name, pt.sum(vrad, axis=-1) + bkg)\n",
     "\n",
     "    # Define the RVs at the observed times\n",
     "    rv_model = get_rv_model(x)\n",
@@ -185,9 +189,9 @@
     "    rv_model_pred = get_rv_model(t, name=\"_pred\")\n",
     "\n",
     "    # Finally add in the observation model. This next line adds a new contribution\n",
-    "    # to the log probability of the PyMC3 model\n",
-    "    err = tt.sqrt(yerr**2 + tt.exp(2 * logs))\n",
-    "    pm.Normal(\"obs\", mu=rv_model, sd=err, observed=y)"
+    "    # to the log probability of the PyMC model\n",
+    "    err = pt.sqrt(yerr**2 + pt.exp(2 * logs))\n",
+    "    pm.Normal(\"obs\", mu=rv_model, sigma=err, observed=y)"
    ]
   },
   {
@@ -237,9 +241,9 @@
    "outputs": [],
    "source": [
     "with model:\n",
-    "    map_soln = pmx.optimize(start=model.test_point, vars=[trend])\n",
+    "    map_soln = pmx.optimize(start=model.initial_point(), vars=[trend])\n",
     "    map_soln = pmx.optimize(start=map_soln, vars=[t0, trend, logK, logP, logs])\n",
-    "    map_soln = pmx.optimize(start=map_soln, vars=[ecs])\n",
+    "    map_soln = pmx.optimize(start=map_soln, vars=[secosw, sesinw])\n",
     "    map_soln = pmx.optimize(start=map_soln)"
    ]
   },
@@ -272,7 +276,7 @@
     "## Sampling\n",
     "\n",
     "Now that we have our model set up and a good estimate of the initial parameters, let's start sampling.\n",
-    "There are substantial covariances between some of the parameters so we'll use the `pmx.sample` function from [pymc3-ext](https://github.com/exoplanet-dev/pymc3-ext) which wraps `pm.sample` function with some better defaults and tuning strategies."
+    "There are substantial covariances between some of the parameters so we'll use the `pmx.utils.sample` function from [pymc-ext](https://github.com/exoplanet-dev/pymc-ext) which wraps `pm.sample` function with some better defaults and tuning strategies."
    ]
   },
   {
@@ -284,7 +288,7 @@
    "source": [
     "np.random.seed(42)\n",
     "with model:\n",
-    "    trace = pmx.sample(\n",
+    "    trace = pmx.utils.sample(\n",
     "        tune=1000,\n",
     "        draws=1000,\n",
     "        cores=2,\n",
@@ -313,7 +317,18 @@
     "import arviz as az\n",
     "\n",
     "az.summary(\n",
-    "    trace, var_names=[\"trend\", \"logs\", \"omega\", \"ecc\", \"t0\", \"logK\", \"P\"]\n",
+    "    trace,\n",
+    "    var_names=[\n",
+    "        \"trend\",\n",
+    "        \"logs\",\n",
+    "        \"omega\",\n",
+    "        \"ecc\",\n",
+    "        \"t0\",\n",
+    "        \"logK\",\n",
+    "        \"P\",\n",
+    "        \"secosw\",\n",
+    "        \"sesinw\",\n",
+    "    ],\n",
     ")"
    ]
   },
@@ -346,7 +361,7 @@
    "id": "856086dd",
    "metadata": {},
    "source": [
-    "Finally, let's plot the plosterior constraints on the RV model and compare those to the data:"
+    "Finally, let's plot the posterior constraints on the RV model and compare those to the data:"
    ]
   },
   {
@@ -473,6 +488,14 @@
   }
  ],
  "metadata": {
+  "jupytext": {
+   "text_repesentation": {
+    "extension": ".py",
+    "format_name": "percent",
+    "format_version": "1.3",
+    "jupytext_version": "1.15.2"
+   }
+  },
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
diff --git a/docs/tutorials/stellar-variability.ipynb b/docs/tutorials/stellar-variability.ipynb
index 9febaa6b..2f75c13d 100644
--- a/docs/tutorials/stellar-variability.ipynb
+++ b/docs/tutorials/stellar-variability.ipynb
@@ -29,7 +29,7 @@
    "metadata": {},
    "source": [
     "When fitting exoplanets, we also need to fit for the stellar variability and Gaussian Processes (GPs) are often a good descriptive model for this variation.\n",
-    "[PyMC3 has support for all sorts of general GP models](https://docs.pymc.io/gp.html), but *exoplanet* interfaces with the [celerite2](https://celerite2.readthedocs.io/) library to provide support for scalable 1D GPs (take a look at the [Getting started](https://celerite2.readthedocs.io/en/latest/tutorials/first/) tutorial on the *celerite2* docs for a crash course) that can work with large datasets.\n",
+    "[PyMC has support for all sorts of general GP models](https://docs.pymc.io/gp.html), but *exoplanet* interfaces with the [celerite2](https://celerite2.readthedocs.io/) library to provide support for scalable 1D GPs (take a look at the [Getting started](https://celerite2.readthedocs.io/en/latest/tutorials/first/) tutorial on the *celerite2* docs for a crash course) that can work with large datasets.\n",
     "In this tutorial, we go through the process of modeling the light curve of a rotating star observed by Kepler using *exoplanet* and *celerite2*.\n",
     "\n",
     "First, let's download and plot the data:"
@@ -119,10 +119,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import pymc3_ext as pmx\n",
-    "import aesara_theano_fallback.tensor as tt\n",
-    "from celerite2.theano import terms, GaussianProcess\n",
+    "import pymc as pm\n",
+    "import pymc_ext as pmx\n",
+    "import pytensor.tensor as pt\n",
+    "from celerite2.pymc import terms, GaussianProcess\n",
     "\n",
     "with pm.Model() as model:\n",
     "    # The mean flux of the time series\n",
@@ -133,18 +133,18 @@
     "\n",
     "    # A term to describe the non-periodic variability\n",
     "    sigma = pm.InverseGamma(\n",
-    "        \"sigma\", **pmx.estimate_inverse_gamma_parameters(1.0, 5.0)\n",
+    "        \"sigma\", **pmx.utils.estimate_inverse_gamma_parameters(1.0, 5.0)\n",
     "    )\n",
     "    rho = pm.InverseGamma(\n",
-    "        \"rho\", **pmx.estimate_inverse_gamma_parameters(0.5, 2.0)\n",
+    "        \"rho\", **pmx.utils.estimate_inverse_gamma_parameters(0.5, 2.0)\n",
     "    )\n",
     "\n",
     "    # The parameters of the RotationTerm kernel\n",
     "    sigma_rot = pm.InverseGamma(\n",
-    "        \"sigma_rot\", **pmx.estimate_inverse_gamma_parameters(1.0, 5.0)\n",
+    "        \"sigma_rot\", **pmx.utils.estimate_inverse_gamma_parameters(1.0, 5.0)\n",
     "    )\n",
     "    log_period = pm.Normal(\"log_period\", mu=np.log(peak[\"period\"]), sigma=2.0)\n",
-    "    period = pm.Deterministic(\"period\", tt.exp(log_period))\n",
+    "    period = pm.Deterministic(\"period\", pt.exp(log_period))\n",
     "    log_Q0 = pm.HalfNormal(\"log_Q0\", sigma=2.0)\n",
     "    log_dQ = pm.Normal(\"log_dQ\", mu=0.0, sigma=2.0)\n",
     "    f = pm.Uniform(\"f\", lower=0.1, upper=1.0)\n",
@@ -154,20 +154,20 @@
     "    kernel += terms.RotationTerm(\n",
     "        sigma=sigma_rot,\n",
     "        period=period,\n",
-    "        Q0=tt.exp(log_Q0),\n",
-    "        dQ=tt.exp(log_dQ),\n",
+    "        Q0=pt.exp(log_Q0),\n",
+    "        dQ=pt.exp(log_dQ),\n",
     "        f=f,\n",
     "    )\n",
     "    gp = GaussianProcess(\n",
     "        kernel,\n",
     "        t=x,\n",
-    "        diag=yerr**2 + tt.exp(2 * log_jitter),\n",
+    "        diag=yerr**2 + pt.exp(2 * log_jitter),\n",
     "        mean=mean,\n",
     "        quiet=True,\n",
     "    )\n",
     "\n",
     "    # Compute the Gaussian Process likelihood and add it into the\n",
-    "    # the PyMC3 model as a \"potential\"\n",
+    "    # the PyMC model as a \"potential\"\n",
     "    gp.marginal(\"gp\", observed=y)\n",
     "\n",
     "    # Compute the mean model prediction for plotting purposes\n",
@@ -207,7 +207,7 @@
    "metadata": {},
    "source": [
     "That looks pretty good!\n",
-    "Now let's sample from the posterior using [the PyMC3 Extras (pymc3-ext) library](https://github.com/exoplanet-dev/pymc3-ext):"
+    "Now let's sample from the posterior using [the PyMC Extras (pymc-ext) library](https://github.com/exoplanet-dev/pymc-ext):"
    ]
   },
   {
@@ -218,10 +218,10 @@
    "outputs": [],
    "source": [
     "with model:\n",
-    "    trace = pmx.sample(\n",
+    "    trace = pmx.utils.sample(\n",
     "        tune=1000,\n",
     "        draws=1000,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        cores=2,\n",
     "        chains=2,\n",
     "        target_accept=0.9,\n",
diff --git a/docs/tutorials/tess.ipynb b/docs/tutorials/tess.ipynb
index 7bffef0f..14cf98ec 100644
--- a/docs/tutorials/tess.ipynb
+++ b/docs/tutorials/tess.ipynb
@@ -150,7 +150,7 @@
    "id": "287e3e93",
    "metadata": {},
    "source": [
-    "## The transit model in PyMC3\n",
+    "## The transit model in PyMC\n",
     "\n",
     "The transit model, initialization, and sampling are all nearly the same as the one in {ref}`joint`."
    ]
@@ -165,11 +165,11 @@
    "outputs": [],
    "source": [
     "import exoplanet as xo\n",
-    "import pymc3 as pm\n",
-    "import aesara_theano_fallback.tensor as tt\n",
+    "import pymc as pm\n",
+    "import pytensor.tensor as pt\n",
     "\n",
-    "import pymc3_ext as pmx\n",
-    "from celerite2.theano import terms, GaussianProcess\n",
+    "import pymc_ext as pmx\n",
+    "from celerite2.pymc import terms, GaussianProcess\n",
     "\n",
     "phase_lc = np.linspace(-0.3, 0.3, 100)\n",
     "\n",
@@ -179,25 +179,32 @@
     "        mask = np.ones(len(x), dtype=bool)\n",
     "    with pm.Model() as model:\n",
     "        # Parameters for the stellar properties\n",
-    "        mean = pm.Normal(\"mean\", mu=0.0, sd=10.0)\n",
-    "        u_star = xo.QuadLimbDark(\"u_star\")\n",
+    "        mean = pm.Normal(\"mean\", mu=0.0, sigma=10.0)\n",
+    "        u_star = xo.quad_limb_dark(\"u_star\")\n",
     "        star = xo.LimbDarkLightCurve(u_star)\n",
     "\n",
     "        # Stellar parameters from Huang et al (2018)\n",
     "        M_star_huang = 1.094, 0.039\n",
     "        R_star_huang = 1.10, 0.023\n",
-    "        BoundedNormal = pm.Bound(pm.Normal, lower=0, upper=3)\n",
-    "        m_star = BoundedNormal(\n",
-    "            \"m_star\", mu=M_star_huang[0], sd=M_star_huang[1]\n",
+    "        m_star = pm.TruncatedNormal(\n",
+    "            \"m_star\",\n",
+    "            mu=M_star_huang[0],\n",
+    "            sigma=M_star_huang[1],\n",
+    "            lower=0,\n",
+    "            upper=3,\n",
     "        )\n",
-    "        r_star = BoundedNormal(\n",
-    "            \"r_star\", mu=R_star_huang[0], sd=R_star_huang[1]\n",
+    "        r_star = pm.TruncatedNormal(\n",
+    "            \"r_star\",\n",
+    "            mu=R_star_huang[0],\n",
+    "            sigma=R_star_huang[1],\n",
+    "            lower=0,\n",
+    "            upper=3,\n",
     "        )\n",
     "\n",
     "        # Orbital parameters for the planets\n",
-    "        t0 = pm.Normal(\"t0\", mu=bls_t0, sd=1)\n",
-    "        log_period = pm.Normal(\"log_period\", mu=np.log(bls_period), sd=1)\n",
-    "        period = pm.Deterministic(\"period\", tt.exp(log_period))\n",
+    "        t0 = pm.Normal(\"t0\", mu=bls_t0, sigma=1)\n",
+    "        log_period = pm.Normal(\"log_period\", mu=np.log(bls_period), sigma=1)\n",
+    "        period = pm.Deterministic(\"period\", pt.exp(log_period))\n",
     "\n",
     "        # Fit in terms of transit depth (assuming b<1)\n",
     "        b = pm.Uniform(\"b\", lower=0, upper=1)\n",
@@ -205,33 +212,35 @@
     "        ror = pm.Deterministic(\n",
     "            \"ror\",\n",
     "            star.get_ror_from_approx_transit_depth(\n",
-    "                1e-3 * tt.exp(log_depth), b\n",
+    "                1e-3 * pt.exp(log_depth), b\n",
     "            ),\n",
     "        )\n",
     "        r_pl = pm.Deterministic(\"r_pl\", ror * r_star)\n",
     "\n",
     "        #         log_r_pl = pm.Normal(\n",
     "        #             \"log_r_pl\",\n",
-    "        #             sd=1.0,\n",
+    "        #             sigma=1.0,\n",
     "        #             mu=0.5 * np.log(1e-3 * np.array(bls_depth))\n",
     "        #             + np.log(R_star_huang[0]),\n",
     "        #         )\n",
-    "        #         r_pl = pm.Deterministic(\"r_pl\", tt.exp(log_r_pl))\n",
+    "        #         r_pl = pm.Deterministic(\"r_pl\", pt.exp(log_r_pl))\n",
     "        #         ror = pm.Deterministic(\"ror\", r_pl / r_star)\n",
-    "        #         b = xo.distributions.ImpactParameter(\"b\", ror=ror)\n",
+    "        #         b = xo.distributions.impact_parameter(\"b\", ror=ror)\n",
     "\n",
-    "        ecs = pmx.UnitDisk(\"ecs\", testval=np.array([0.01, 0.0]))\n",
-    "        ecc = pm.Deterministic(\"ecc\", tt.sum(ecs**2))\n",
-    "        omega = pm.Deterministic(\"omega\", tt.arctan2(ecs[1], ecs[0]))\n",
+    "        secosw, sesinw = xo.unit_disk(\n",
+    "            \"secosw\", \"sesinw\", initval=np.array([0.01, 0.0])\n",
+    "        )\n",
+    "        ecc = pm.Deterministic(\"ecc\", secosw**2 + sesinw**2)\n",
+    "        omega = pm.Deterministic(\"omega\", pt.arctan2(sesinw, secosw))\n",
     "        xo.eccentricity.kipping13(\"ecc_prior\", fixed=True, observed=ecc)\n",
     "\n",
     "        # Transit jitter & GP parameters\n",
     "        log_sigma_lc = pm.Normal(\n",
-    "            \"log_sigma_lc\", mu=np.log(np.std(y[mask])), sd=10\n",
+    "            \"log_sigma_lc\", mu=np.log(np.std(y[mask])), sigma=10\n",
     "        )\n",
-    "        log_rho_gp = pm.Normal(\"log_rho_gp\", mu=0, sd=10)\n",
+    "        log_rho_gp = pm.Normal(\"log_rho_gp\", mu=0, sigma=10)\n",
     "        log_sigma_gp = pm.Normal(\n",
-    "            \"log_sigma_gp\", mu=np.log(np.std(y[mask])), sd=10\n",
+    "            \"log_sigma_gp\", mu=np.log(np.std(y[mask])), sigma=10\n",
     "        )\n",
     "\n",
     "        # Orbit model\n",
@@ -250,18 +259,19 @@
     "            star.get_light_curve(orbit=orbit, r=r_pl, t=x[mask], texp=texp)\n",
     "            * 1e3\n",
     "        )\n",
-    "        light_curve = tt.sum(light_curves, axis=-1) + mean\n",
-    "        resid = y[mask] - light_curve\n",
+    "        light_curve = pt.sum(light_curves, axis=-1) + mean\n",
     "\n",
     "        # GP model for the light curve\n",
     "        kernel = terms.SHOTerm(\n",
-    "            sigma=tt.exp(log_sigma_gp),\n",
-    "            rho=tt.exp(log_rho_gp),\n",
+    "            sigma=pt.exp(log_sigma_gp),\n",
+    "            rho=pt.exp(log_rho_gp),\n",
     "            Q=1 / np.sqrt(2),\n",
     "        )\n",
-    "        gp = GaussianProcess(kernel, t=x[mask], yerr=tt.exp(log_sigma_lc))\n",
-    "        gp.marginal(\"gp\", observed=resid)\n",
-    "        #         pm.Deterministic(\"gp_pred\", gp.predict(resid))\n",
+    "        gp = GaussianProcess(\n",
+    "            kernel, t=x[mask], yerr=pt.exp(log_sigma_lc), mean=light_curve\n",
+    "        )\n",
+    "        gp.marginal(\"gp\", observed=y[mask])\n",
+    "        #         pm.Deterministic(\"gp_pred\", gp.predict(y[mask]))\n",
     "\n",
     "        # Compute and save the phased light curve models\n",
     "        pm.Deterministic(\n",
@@ -275,7 +285,7 @@
     "        # Fit for the maximum a posteriori parameters, I've found that I can get\n",
     "        # a better solution by trying different combinations of parameters in turn\n",
     "        if start is None:\n",
-    "            start = model.test_point\n",
+    "            start = model.initial_point()\n",
     "        map_soln = pmx.optimize(\n",
     "            start=start, vars=[log_sigma_lc, log_sigma_gp, log_rho_gp]\n",
     "        )\n",
@@ -285,7 +295,7 @@
     "        map_soln = pmx.optimize(start=map_soln, vars=[u_star])\n",
     "        map_soln = pmx.optimize(start=map_soln, vars=[log_depth])\n",
     "        map_soln = pmx.optimize(start=map_soln, vars=[b])\n",
-    "        map_soln = pmx.optimize(start=map_soln, vars=[ecs])\n",
+    "        map_soln = pmx.optimize(start=map_soln, vars=[secosw, sesinw])\n",
     "        map_soln = pmx.optimize(start=map_soln, vars=[mean])\n",
     "        map_soln = pmx.optimize(\n",
     "            start=map_soln, vars=[log_sigma_lc, log_sigma_gp, log_rho_gp]\n",
@@ -295,7 +305,9 @@
     "        extras = dict(\n",
     "            zip(\n",
     "                [\"light_curves\", \"gp_pred\"],\n",
-    "                pmx.eval_in_model([light_curves, gp.predict(resid)], map_soln),\n",
+    "                pmx.eval_in_model(\n",
+    "                    [light_curves, gp.predict(y[mask])], map_soln\n",
+    "                ),\n",
     "            )\n",
     "        )\n",
     "\n",
@@ -330,7 +342,8 @@
     "\n",
     "    ax = axes[0]\n",
     "    ax.plot(x[mask], y[mask], \"k\", label=\"data\")\n",
-    "    gp_mod = extras[\"gp_pred\"] + soln[\"mean\"]\n",
+    "    lc_mod = np.sum(extras[\"light_curves\"], axis=-1)\n",
+    "    gp_mod = extras[\"gp_pred\"] - lc_mod\n",
     "    ax.plot(x[mask], gp_mod, color=\"C2\", label=\"gp model\")\n",
     "    ax.legend(fontsize=10)\n",
     "    ax.set_ylabel(\"relative flux [ppt]\")\n",
@@ -344,7 +357,7 @@
     "    ax.set_ylabel(\"de-trended flux [ppt]\")\n",
     "\n",
     "    ax = axes[2]\n",
-    "    mod = gp_mod + np.sum(extras[\"light_curves\"], axis=-1)\n",
+    "    mod = gp_mod + lc_mod\n",
     "    ax.plot(x[mask], y[mask] - mod, \"k\")\n",
     "    ax.axhline(0, color=\"#aaaaaa\", lw=1)\n",
     "    ax.set_ylabel(\"residuals [ppt]\")\n",
@@ -372,11 +385,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "mod = (\n",
-    "    extras0[\"gp_pred\"]\n",
-    "    + map_soln0[\"mean\"]\n",
-    "    + np.sum(extras0[\"light_curves\"], axis=-1)\n",
-    ")\n",
+    "mod = extras0[\"gp_pred\"]\n",
     "resid = y - mod\n",
     "rms = np.sqrt(np.median(resid**2))\n",
     "mask = np.abs(resid) < 5 * rms\n",
@@ -431,7 +440,7 @@
     "    trace = pm.sample(\n",
     "        tune=1500,\n",
     "        draws=1000,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        # Parallel sampling runs poorly or crashes on macos\n",
     "        cores=1 if platform.system() == \"Darwin\" else 2,\n",
     "        chains=2,\n",
@@ -489,8 +498,9 @@
     "flat_samps = trace.posterior.stack(sample=(\"chain\", \"draw\"))\n",
     "\n",
     "# Compute the GP prediction\n",
-    "gp_mod = extras[\"gp_pred\"] + map_soln[\"mean\"]  # np.median(\n",
-    "#     flat_samps[\"gp_pred\"].values + flat_samps[\"mean\"].values[None, :], axis=-1\n",
+    "lc_mod = np.sum(extras[\"light_curves\"], axis=-1)\n",
+    "gp_mod = extras[\"gp_pred\"] - lc_mod  # np.median(\n",
+    "#     flat_samps[\"gp_pred\"].values - flat_samps[\"lc_pred\"], axis=-1\n",
     "# )\n",
     "\n",
     "# Get the posterior median orbital parameters\n",
diff --git a/docs/tutorials/transit.ipynb b/docs/tutorials/transit.ipynb
index 9f48c582..9dad9c18 100644
--- a/docs/tutorials/transit.ipynb
+++ b/docs/tutorials/transit.ipynb
@@ -55,7 +55,7 @@
     "    .get_light_curve(orbit=orbit, r=0.1, t=t, texp=0.02)\n",
     "    .eval()\n",
     ")\n",
-    "# Note: the `eval` is needed because this is using Theano in\n",
+    "# Note: the `eval` is needed because this is using PyTensor in\n",
     "# the background\n",
     "\n",
     "plt.plot(t, light_curve, color=\"C0\", lw=2)\n",
@@ -69,11 +69,11 @@
    "id": "f9f84dd5",
    "metadata": {},
    "source": [
-    "But the real power comes from the fact that this is defined as a [Aesara/Theano operation](https://aesara.readthedocs.io/en/latest/extending/index.html) so it can be combined with PyMC3 to do gradient-based inference.\n",
+    "But the real power comes from the fact that this is defined as a [PyTensor/Theano operation](https://pytensor.readthedocs.io/en/latest/extending/index.html) so it can be combined with PyMC to do gradient-based inference.\n",
     "\n",
-    "## The transit model in PyMC3\n",
+    "## The transit model in PyMC\n",
     "\n",
-    "In this section, we will construct a simple transit fit model using *PyMC3* and then we will fit a two planet model to simulated data.\n",
+    "In this section, we will construct a simple transit fit model using *PyMC* and then we will fit a two planet model to simulated data.\n",
     "To start, let's randomly sample some periods and phases and then define the time sampling:"
    ]
   },
@@ -99,7 +99,7 @@
     "Then, define the parameters.\n",
     "In this simple model, we'll just fit for the limb darkening parameters of the star, and the period, phase, impact parameter, and radius ratio of the planets (note: this is already 10 parameters and running MCMC to convergence using [emcee](https://emcee.readthedocs.io) would probably take at least an hour).\n",
     "For the limb darkening, we'll use a quadratic law as parameterized by [Kipping (2013)](https://arxiv.org/abs/1308.0009).\n",
-    "This reparameterizations is implemented in *exoplanet* as custom *PyMC3* distribution :class:`exoplanet.distributions.QuadLimbDark`."
+    "This reparameterizations is implemented in *exoplanet* as custom *PyMC* distribution :class:`exoplanet.distributions.quad_limb_dark`."
    ]
   },
   {
@@ -109,29 +109,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import pymc3_ext as pmx\n",
+    "import pymc as pm\n",
+    "import pymc_ext as pmx\n",
     "\n",
     "with pm.Model() as model:\n",
     "    # The baseline flux\n",
-    "    mean = pm.Normal(\"mean\", mu=0.0, sd=1.0)\n",
+    "    mean = pm.Normal(\"mean\", mu=0.0, sigma=1.0)\n",
     "\n",
     "    # The time of a reference transit for each planet\n",
-    "    t0 = pm.Normal(\"t0\", mu=t0s, sd=1.0, shape=2)\n",
+    "    t0 = pm.Normal(\"t0\", mu=t0s, sigma=1.0, shape=2)\n",
     "\n",
     "    # The log period; also tracking the period itself\n",
-    "    logP = pm.Normal(\"logP\", mu=np.log(periods), sd=0.1, shape=2)\n",
+    "    logP = pm.Normal(\"logP\", mu=np.log(periods), sigma=0.1, shape=2)\n",
     "    period = pm.Deterministic(\"period\", pm.math.exp(logP))\n",
     "\n",
     "    # The Kipping (2013) parameterization for quadratic limb darkening paramters\n",
-    "    u = xo.distributions.QuadLimbDark(\"u\", testval=np.array([0.3, 0.2]))\n",
+    "    u = xo.quad_limb_dark(\"u\", initval=np.array([0.3, 0.2]))\n",
     "\n",
     "    r = pm.Uniform(\n",
-    "        \"r\", lower=0.01, upper=0.1, shape=2, testval=np.array([0.04, 0.06])\n",
-    "    )\n",
-    "    b = xo.distributions.ImpactParameter(\n",
-    "        \"b\", ror=r, shape=2, testval=np.random.rand(2)\n",
+    "        \"r\", lower=0.01, upper=0.1, shape=2, initval=np.array([0.04, 0.06])\n",
     "    )\n",
+    "    b = xo.impact_parameter(\"b\", ror=r, shape=2, initval=np.random.rand(2))\n",
     "\n",
     "    # Set up a Keplerian orbit for the planets\n",
     "    orbit = xo.orbits.KeplerianOrbit(period=period, t0=t0, b=b)\n",
@@ -159,11 +157,11 @@
     "    # ******************************************************************* #\n",
     "\n",
     "    # The likelihood function assuming known Gaussian uncertainty\n",
-    "    pm.Normal(\"obs\", mu=light_curve, sd=yerr, observed=y)\n",
+    "    pm.Normal(\"obs\", mu=light_curve, sigma=yerr, observed=y)\n",
     "\n",
     "    # Fit for the maximum a posteriori parameters given the simuated\n",
     "    # dataset\n",
-    "    map_soln = pmx.optimize(start=model.test_point)"
+    "    map_soln = pmx.optimize(start=model.initial_point())"
    ]
   },
   {
@@ -201,7 +199,7 @@
     "## Sampling\n",
     "\n",
     "Now, let's sample from the posterior defined by this model.\n",
-    "As usual, there are strong covariances between some of the parameters so we'll use `pmx.sample` from [pymc3-ext](https://github.com/exoplanet-dev/pymc3-ext)."
+    "As usual, there are strong covariances between some of the parameters so we'll use `pmx.utils.sample` from [pymc-ext](https://github.com/exoplanet-dev/pymc-ext)."
    ]
   },
   {
@@ -213,10 +211,10 @@
    "source": [
     "np.random.seed(42)\n",
     "with model:\n",
-    "    trace = pmx.sample(\n",
+    "    trace = pmx.utils.sample(\n",
     "        tune=1000,\n",
     "        draws=1000,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        cores=2,\n",
     "        chains=2,\n",
     "        target_accept=0.9,\n",
@@ -268,7 +266,7 @@
     "truth = dict(\n",
     "    zip(\n",
     "        [\"period\", \"r\"],\n",
-    "        pmx.eval_in_model([period, r], model.test_point, model=model),\n",
+    "        pmx.eval_in_model([period, r], model.initial_point(), model=model),\n",
     "    )\n",
     ")\n",
     "_ = corner.corner(\n",
diff --git a/docs/tutorials/ttv.ipynb b/docs/tutorials/ttv.ipynb
index 25eeafa4..01d2c77e 100644
--- a/docs/tutorials/ttv.ipynb
+++ b/docs/tutorials/ttv.ipynb
@@ -102,7 +102,7 @@
    "id": "86f7e561",
    "metadata": {},
    "source": [
-    "Now, like in the [Transit fitting](https://docs.exoplanet.codes/en/stable/tutorials/transit/) tutorial, we'll set up the the model using *PyMC3* and *exoplanet*, and then simulate a data set from that model."
+    "Now, like in the [Transit fitting](https://docs.exoplanet.codes/en/stable/tutorials/transit/) tutorial, we'll set up the the model using *PyMC* and *exoplanet*, and then simulate a data set from that model."
    ]
   },
   {
@@ -112,25 +112,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pymc3 as pm\n",
-    "import pymc3_ext as pmx\n",
-    "import aesara_theano_fallback.tensor as tt\n",
+    "import pymc as pm\n",
+    "import pymc_ext as pmx\n",
+    "import pytensor.tensor as pt\n",
     "\n",
     "np.random.seed(9485023)\n",
     "\n",
     "with pm.Model() as model:\n",
     "    # This part of the model is similar to the model in the `transit` tutorial\n",
-    "    mean = pm.Normal(\"mean\", mu=0.0, sd=1.0)\n",
-    "    u = xo.QuadLimbDark(\"u\", testval=np.array([0.3, 0.2]))\n",
+    "    mean = pm.Normal(\"mean\", mu=0.0, sigma=1.0)\n",
+    "    u = xo.quad_limb_dark(\"u\", initval=np.array([0.3, 0.2]))\n",
     "    logr = pm.Uniform(\n",
     "        \"logr\",\n",
     "        lower=np.log(0.01),\n",
     "        upper=np.log(0.1),\n",
     "        shape=2,\n",
-    "        testval=np.log([0.04, 0.06]),\n",
+    "        initval=np.log([0.04, 0.06]),\n",
     "    )\n",
-    "    r = pm.Deterministic(\"r\", tt.exp(logr))\n",
-    "    b = xo.ImpactParameter(\n",
+    "    r = pm.Deterministic(\"r\", pt.exp(logr))\n",
+    "    b = xo.impact_parameter(\n",
     "        \"b\", ror=r, shape=2, testval=0.5 * np.random.rand(2)\n",
     "    )\n",
     "\n",
@@ -141,7 +141,7 @@
     "            pm.Normal(\n",
     "                \"tts_{0}\".format(i),\n",
     "                mu=true_transit_times[i],\n",
-    "                sd=1.0,\n",
+    "                sigma=1.0,\n",
     "                shape=len(true_transit_times[i]),\n",
     "            )\n",
     "        )\n",
@@ -174,9 +174,9 @@
     "    # End of fake data creation; you want to include the following lines  #\n",
     "    # ******************************************************************* #\n",
     "\n",
-    "    pm.Normal(\"obs\", mu=light_curve, sd=yerr, observed=y)\n",
+    "    pm.Normal(\"obs\", mu=light_curve, sigma=yerr, observed=y)\n",
     "\n",
-    "    map_soln = model.test_point\n",
+    "    map_soln = model.initial_point()\n",
     "    map_soln = pmx.optimize(start=map_soln, vars=transit_times)\n",
     "    map_soln = pmx.optimize(start=map_soln, vars=[r, b])\n",
     "    map_soln = pmx.optimize(start=map_soln, vars=transit_times)\n",
@@ -310,10 +310,10 @@
    "source": [
     "np.random.seed(230948)\n",
     "with model:\n",
-    "    trace = pmx.sample(\n",
+    "    trace = pmx.utils.sample(\n",
     "        tune=1000,\n",
     "        draws=1000,\n",
-    "        start=map_soln,\n",
+    "        initvals=map_soln,\n",
     "        cores=2,\n",
     "        chains=2,\n",
     "        return_inferencedata=True,\n",