diff --git a/.github/workflows/Bayesian.yml b/.github/workflows/Bayesian.yml
new file mode 100644
index 000000000..ceb3a751c
--- /dev/null
+++ b/.github/workflows/Bayesian.yml
@@ -0,0 +1,47 @@
+name: "Bayesian Tests"
+
+on:
+  push:
+    branches: [master]
+    paths:
+      - 'lib/BayesianNeuralPDE/**'
+  pull_request:
+    paths:
+      - 'lib/BayesianNeuralPDE/**'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref_name != github.event.repository.default_branch || github.ref != 'refs/tags/v*' }}
+
+jobs:
+  tests:
+    name: Julia ${{ matrix.version }} - ${{ matrix.group }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1'
+          - 'lts'
+          - 'pre'
+        group:
+          - 'ODEBPINN'
+          - 'PDEBPINN'
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      - name: Set up Julia
+        uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+
+      - name: Run tests
+        run: |
+          julia --project -e 'using Pkg; Pkg.instantiate()'
+          julia --project -e 'using Pkg; Pkg.build()'
+          julia --project -e 'using Pkg; Pkg.test(coverage=true)'
+        env:
+          JULIA_GROUP: ${{ matrix.group }}
+          JULIA_COVERAGE_DIRECTORIES: "src,ext"
+          JULIA_RUNTEST_DEPWARN: "yes"
\ No newline at end of file
diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml
index 1a405daf4..c6ebdbe20 100644
--- a/.github/workflows/Tests.yml
+++ b/.github/workflows/Tests.yml
@@ -29,10 +29,6 @@ jobs:
           - "pre"
         group:
           - "QA"
-          - "ODEBPINN"
-          - "PDEBPINN"
-          - "NNPDE1"
-          - "NNPDE2"
           - "AdaptiveLoss"
           - "Forward"
           - "DGM"
@@ -46,4 +42,4 @@ jobs:
       julia-version: "${{ matrix.version }}"
       coverage-directories: "src,ext"
       julia-runtest-depwarn: "yes"  # TensorBoardLogger has a global depwarn
-    secrets: "inherit"
+    secrets: "inherit"
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index 707742e04..67112b1d2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -22,6 +22,7 @@ IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
 MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
diff --git a/lib/BayesianNeuralPDE/Project.toml b/lib/BayesianNeuralPDE/Project.toml
new file mode 100644
index 000000000..c6cc2b5c1
--- /dev/null
+++ b/lib/BayesianNeuralPDE/Project.toml
@@ -0,0 +1,30 @@
+name = "BayesianNeuralPDE"
+uuid = "3cea9122-e921-42ea-a9d7-c72fcb58ce53"
+authors = ["paramthakkar123 <paramthakkar864@gmail.com>"]
+version = "0.1.0"
+
+[deps]
+AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
+ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
+Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
+MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
+OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
+OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
+NeuralPDE = "315f7962-48a3-4962-8226-d0f33b1235f0"
+
+[compat]
+ChainRulesCore = "1.25.1"
+ConcreteStructs = "0.2.3"
+MonteCarloMeasurements = "1.4.3"
+Printf = "1.11.0"
+SciMLBase = "2.72.1"
diff --git a/src/BPINN_ode.jl b/lib/BayesianNeuralPDE/src/BPINN_ode.jl
similarity index 85%
rename from src/BPINN_ode.jl
rename to lib/BayesianNeuralPDE/src/BPINN_ode.jl
index 489cd5f6d..0b3589742 100644
--- a/src/BPINN_ode.jl
+++ b/lib/BayesianNeuralPDE/src/BPINN_ode.jl
@@ -1,14 +1,14 @@
 # HIGH level API for BPINN ODE solver
 
 """
-    BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
-           priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
-           phystd = [0.05], phynewstd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
-           MCMCargs = (; n_leapfrog=30), nchains = 1, init_params = nothing,
-           Adaptorkwargs = (; Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
-                              Metric = DiagEuclideanMetric),
-           Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
-           progress = false, verbose = false)
+	BNNODE(chain, kernel = HMC; strategy = nothing, draw_samples = 2000,
+		   priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
+		   phystd = [0.05], phynewstd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
+		   MCMCargs = (; n_leapfrog=30), nchains = 1, init_params = nothing,
+		   Adaptorkwargs = (; Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8,
+							  Metric = DiagEuclideanMetric),
+		   Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
+		   progress = false, verbose = false)
 
 Algorithm for solving ordinary differential equations using a Bayesian neural network. This
 is a specialization of the physics-informed neural network which is used as a solver for a
@@ -16,9 +16,9 @@ standard `ODEProblem`.
 
 !!! warn
 
-    Note that BNNODE only supports ODEs which are written in the out-of-place form, i.e.
-    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the BNNODE
-    will exit with an error.
+	Note that BNNODE only supports ODEs which are written in the out-of-place form, i.e.
+	`du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the BNNODE
+	will exit with an error.
 
 ## Positional Arguments
 
@@ -48,14 +48,14 @@ dataset = [x̂, time]
 chainlux = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
 
 alg = BNNODE(chainlux; draw_samples = 2000, l2std = [0.05], phystd = [0.05],
-             priorsNNw = (0.0, 3.0), progress = true)
+			 priorsNNw = (0.0, 3.0), progress = true)
 
 sol_lux = solve(prob, alg)
 
 # with parameter estimation
 alg = BNNODE(chainlux; dataset, draw_samples = 2000, l2std = [0.05], phystd = [0.05],
-             priorsNNw = (0.0, 10.0), param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
-             progress = true)
+			 priorsNNw = (0.0, 10.0), param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
+			 progress = true)
 
 sol_lux_pestim = solve(prob, alg)
 ```
@@ -79,7 +79,7 @@ Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, El
 """
 @concrete struct BNNODE <: NeuralPDEAlgorithm
     chain <: AbstractLuxLayer
-    kernel
+    kernel::Any
     strategy <: Union{Nothing, AbstractTrainingStrategy}
     draw_samples::Int
     priorsNNw::Tuple{Float64, Float64}
@@ -122,19 +122,19 @@ Contains `ahmc_bayesian_pinn_ode()` function output:
 1. A MCMCChains.jl chain object for sampled parameters.
 2. The set of all sampled parameters.
 3. Statistics like:
-    - n_steps
-    - acceptance_rate
-    - log_density
-    - hamiltonian_energy
-    - hamiltonian_energy_error
-    - numerical_error
-    - step_size
-    - nom_step_size
+	- n_steps
+	- acceptance_rate
+	- log_density
+	- hamiltonian_energy
+	- hamiltonian_energy_error
+	- numerical_error
+	- step_size
+	- nom_step_size
 """
 @concrete struct BPINNstats
-    mcmc_chain
-    samples
-    statistics
+    mcmc_chain::Any
+    samples::Any
+    statistics::Any
 end
 
 """
@@ -149,10 +149,10 @@ contains fields related to that).
 """
 @concrete struct BPINNsolution
     original <: BPINNstats
-    ensemblesol
-    estimated_nn_params
-    estimated_de_params
-    timepoints
+    ensemblesol::Any
+    estimated_nn_params::Any
+    estimated_de_params::Any
+    timepoints::Any
 end
 
 function SciMLBase.__solve(prob::SciMLBase.ODEProblem, alg::BNNODE, args...; dt = nothing,
diff --git a/lib/BayesianNeuralPDE/src/BayesianNeuralPDE.jl b/lib/BayesianNeuralPDE/src/BayesianNeuralPDE.jl
new file mode 100644
index 000000000..3944a8870
--- /dev/null
+++ b/lib/BayesianNeuralPDE/src/BayesianNeuralPDE.jl
@@ -0,0 +1,26 @@
+module BayesianNeuralPDE
+
+using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
+      AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
+using Printf: @printf
+using ConcreteStructs: @concrete
+using NeuralPDE: PhysicsInformedNN
+using SciMLBase: SciMLBase
+using ChainRulesCore: ChainRulesCore, @non_differentiable, @ignore_derivatives
+using LogDensityProblems: LogDensityProblems
+
+abstract type AbstractPINN end
+
+abstract type AbstractTrainingStrategy end
+abstract type NeuralPDEAlgorithm <: SciMLBase.AbstractODEAlgorithm end
+
+include("advancedHMC_MCMC.jl")
+include("pinn_types.jl")
+include("BPINN_ode.jl")
+include("discretize.jl")
+include("PDE_BPINN.jl")
+
+export BNNODE, ahmc_bayesian_pinn_ode, ahmc_bayesian_pinn_pde
+export BPINNsolution, BayesianPINN
+
+end
\ No newline at end of file
diff --git a/src/PDE_BPINN.jl b/lib/BayesianNeuralPDE/src/PDE_BPINN.jl
similarity index 97%
rename from src/PDE_BPINN.jl
rename to lib/BayesianNeuralPDE/src/PDE_BPINN.jl
index a9b272c39..2b55ac305 100644
--- a/src/PDE_BPINN.jl
+++ b/lib/BayesianNeuralPDE/src/PDE_BPINN.jl
@@ -8,9 +8,9 @@
     names::Tuple
     extraparams::Int
     init_params <: Union{AbstractVector, NamedTuple, ComponentArray}
-    full_loglikelihood
-    L2_loss2
-    Φ
+    full_loglikelihood::Any
+    L2_loss2::Any
+    Φ::Any
 end
 
 function LogDensityProblems.logdensity(ltd::PDELogTargetDensity, θ)
@@ -252,13 +252,13 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
 end
 
 """
-    ahmc_bayesian_pinn_pde(pde_system, discretization;
-        draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
-        phynewstd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1,
-        Kernel = HMC(0.1, 30), Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-        Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-        numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)
+	ahmc_bayesian_pinn_pde(pde_system, discretization;
+		draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
+		phynewstd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1,
+		Kernel = HMC(0.1, 30), Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+			Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+		Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
+		numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)
 
 ## NOTES
 
@@ -305,8 +305,8 @@ end
 
 !!! warning
 
-    AdvancedHMC.jl is still developing convenience structs so might need changes on new
-    releases.
+	AdvancedHMC.jl is still developing convenience structs so might need changes on new
+	releases.
 """
 function ahmc_bayesian_pinn_pde(pde_system, discretization;
         draw_samples = 1000, bcstd = [0.01], l2std = [0.05], phystd = [0.05],
diff --git a/lib/BayesianNeuralPDE/src/advancedHMC_MCMC.jl b/lib/BayesianNeuralPDE/src/advancedHMC_MCMC.jl
new file mode 100644
index 000000000..a577e89b6
--- /dev/null
+++ b/lib/BayesianNeuralPDE/src/advancedHMC_MCMC.jl
@@ -0,0 +1,240 @@
+"""
+	ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining, dataset = [nothing],
+						   init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0f0,
+						   l2std = [0.05], phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
+						   param = [], nchains = 1, autodiff = false, Kernel = HMC,
+						   Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+							   Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+						   Integratorkwargs = (Integrator = Leapfrog,),
+						   MCMCkwargs = (n_leapfrog = 30,), progress = false,
+						   verbose = false)
+
+!!! warn
+
+	Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the
+	out-of-place form, i.e. `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared
+	out-of-place, then `ahmc_bayesian_pinn_ode()` will exit with an error.
+
+## Example
+
+```julia
+linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = [5.0, -5.0]
+prob = ODEProblem(linear, u0, tspan, p)
+
+### CREATE DATASET (Necessity for accurate Parameter estimation)
+sol = solve(prob, Tsit5(); saveat = 0.05)
+u = sol.u[1:100]
+time = sol.t[1:100]
+
+### dataset and BPINN create
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+dataset = [x̂, time]
+
+chain1 = Lux.Chain(Lux.Dense(1, 5, tanh), Lux.Dense(5, 5, tanh), Lux.Dense(5, 1)
+
+### simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chain1,
+															dataset = dataset,
+															draw_samples = 1500,
+															l2std = [0.05],
+															phystd = [0.05],
+															priorsNNw = (0.0,3.0))
+
+### solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chain1,
+															dataset = dataset,
+															draw_samples = 1500,
+															l2std = [0.05],
+															phystd = [0.05],
+															priorsNNw = (0.0,3.0),
+															param = [Normal(6.5,0.5), Normal(-3,0.5)])
+```
+
+## NOTES
+
+Dataset is required for accurate Parameter estimation + solving equations
+Incase you are only solving the Equations for solution, do not provide dataset
+
+## Positional Arguments
+
+* `prob`: DEProblem(out of place and the function signature should be f(u,p,t).
+* `chain`: Lux Neural Netork which would be made the Bayesian PINN.
+
+## Keyword Arguments
+
+* `strategy`: The training strategy used to choose the points for the evaluations. By
+  default GridTraining is used with given physdt discretization.
+* `init_params`: initial parameter values for BPINN (ideally for multiple chains different
+  initializations preferred)
+* `nchains`: number of chains you want to sample
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are
+  ~2/3 of draw samples)
+* `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
+* `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
+* `phynewstd`: standard deviation of new loss func term
+* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of
+  BPINN are Normal Distributions by default.
+* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
+* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
+* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
+* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implementations HMC/NUTS/HMCDA)
+* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`.
+  Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`.
+  Refer: https://turinglang.org/AdvancedHMC.jl/stable/ Note: Target percentage (in decimal)
+  of iterations in which the proposals are accepted (0.8 by default)
+* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's (HMC/NUTS/HMCDA)
+  Arguments, as follows :
+	* `n_leapfrog`: number of leapfrog steps for HMC
+	* `δ`: target acceptance probability for NUTS and HMCDA
+	* `λ`: target trajectory length for HMCDA
+	* `max_depth`: Maximum doubling tree depth (NUTS)
+	* `Δ_max`: Maximum divergence during doubling tree (NUTS)
+	Refer: https://turinglang.org/AdvancedHMC.jl/stable/
+* `progress`: controls whether to show the progress meter or not.
+* `verbose`: controls the verbosity. (Sample call args in AHMC)
+
+!!! warning
+
+	AdvancedHMC.jl is still developing convenience structs so might need changes on new
+	releases.
+"""
+function ahmc_bayesian_pinn_ode(
+        prob::SciMLBase.ODEProblem, chain; strategy = GridTraining, dataset = [nothing],
+        init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0, l2std = [0.05],
+        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1,
+        autodiff = false, Kernel = HMC,
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,), MCMCkwargs = (n_leapfrog = 30,),
+        progress = false, verbose = false, estim_collocate = false)
+    @assert !isinplace(prob) "The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."
+
+    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
+
+    strategy = strategy == GridTraining ? strategy(physdt) : strategy
+
+    if dataset != [nothing] &&
+       (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
+        error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}")
+    end
+
+    if dataset != [nothing] && param == []
+        println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
+    elseif dataset == [nothing] && param != []
+        error("Dataset Required for Parameter Estimation.")
+    end
+
+    initial_nnθ, chain, st = generate_ltd(chain, init_params)
+
+    @assert nchains≤Threads.nthreads() "number of chains is greater than available threads"
+    @assert nchains≥1 "number of chains must be greater than 1"
+
+    # eltype(physdt) cause needs Float64 for find_good_stepsize
+    # Lux chain(using component array later as vector_to_parameter need namedtuple)
+    T = eltype(physdt)
+    initial_θ = getdata(ComponentArray{T}(initial_nnθ))
+
+    # adding ode parameter estimation
+    nparameters = length(initial_θ)
+    ninv = length(param)
+    priors = [
+        MvNormal(T(priorsNNw[1]) * ones(T, nparameters),
+        Diagonal(abs2.(T(priorsNNw[2]) .* ones(T, nparameters))))
+    ]
+
+    # append Ode params to all paramvector
+    if ninv > 0
+        # shift ode params(initialise ode params by prior means)
+        initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
+        priors = vcat(priors, param)
+        nparameters += ninv
+    end
+
+    smodel = StatefulLuxLayer{true}(chain, nothing, st)
+    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
+    ℓπ = LogTargetDensity(nparameters, prob, smodel, strategy, dataset, priors,
+        phystd, phynewstd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
+
+    if verbose
+        @printf("Current Physics Log-likelihood: %g\n", physloglikelihood(ℓπ, initial_θ))
+        @printf("Current Prior Log-likelihood: %g\n", priorweights(ℓπ, initial_θ))
+        @printf("Current SSE against dataset Log-likelihood: %g\n",
+            L2LossData(ℓπ, initial_θ))
+        if estim_collocate
+            @printf("Current gradient loss against dataset Log-likelihood: %g\n",
+                L2loss2(ℓπ, initial_θ))
+        end
+    end
+
+    Adaptor = Adaptorkwargs[:Adaptor]
+    Metric = Adaptorkwargs[:Metric]
+    targetacceptancerate = Adaptorkwargs[:targetacceptancerate]
+
+    # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
+    metric = Metric(nparameters)
+    hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
+
+    # parallel sampling option
+    if nchains != 1
+        # Cache to store the chains
+        chains = Vector{Any}(undef, nchains)
+        statsc = Vector{Any}(undef, nchains)
+        samplesc = Vector{Any}(undef, nchains)
+
+        Threads.@threads for i in 1:nchains
+            # each chain has different initial NNparameter values(better posterior exploration)
+            initial_θ = vcat(
+                randn(eltype(initial_θ), nparameters - ninv),
+                initial_θ[(nparameters - ninv + 1):end]
+            )
+            initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+            integrator = integratorchoice(Integratorkwargs, initial_ϵ)
+            adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+                StepSizeAdaptor(targetacceptancerate, integrator))
+
+            MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
+            Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
+            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
+                progress = progress, verbose = verbose)
+
+            samplesc[i] = samples
+            statsc[i] = stats
+            mcmc_chain = Chains(reduce(hcat, samples)')
+            chains[i] = mcmc_chain
+        end
+
+        return chains, samplesc, statsc
+    else
+        initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+        integrator = integratorchoice(Integratorkwargs, initial_ϵ)
+        adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+            StepSizeAdaptor(targetacceptancerate, integrator))
+
+        MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
+        Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
+        samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
+            adaptor; progress = progress, verbose = verbose)
+
+        if verbose
+            println("Sampling Complete.")
+            @printf("Final Physics Log-likelihood: %g\n",
+                physloglikelihood(ℓπ, samples[end]))
+            @printf("Final Prior Log-likelihood: %g\n", priorweights(ℓπ, samples[end]))
+            @printf("Final SSE against dataset Log-likelihood: %g\n",
+                L2LossData(ℓπ, samples[end]))
+            if estim_collocate
+                @printf("Final gradient loss against dataset Log-likelihood: %g\n",
+                    L2loss2(ℓπ, samples[end]))
+            end
+        end
+
+        # return a chain(basic chain),samples and stats
+        matrix_samples = reshape(hcat(samples...), (length(samples[1]), length(samples), 1))
+        mcmc_chain = MCMCChains.Chains(matrix_samples)
+        return mcmc_chain, samples, stats
+    end
+end
diff --git a/lib/BayesianNeuralPDE/src/discretize.jl b/lib/BayesianNeuralPDE/src/discretize.jl
new file mode 100644
index 000000000..8b655a6e9
--- /dev/null
+++ b/lib/BayesianNeuralPDE/src/discretize.jl
@@ -0,0 +1,78 @@
+function get_likelihood_estimate_function(discretization::BayesianPINN)
+    dataset_pde, dataset_bc = discretization.dataset
+
+    pde_loss_functions, bc_loss_functions = merge_strategy_with_loglikelihood_function(
+        pinnrep, strategy,
+        datafree_pde_loss_functions, datafree_bc_loss_functions)
+
+    # required as Physics loss also needed on the discrete dataset domain points
+    # data points are discrete and so by default GridTraining loss applies
+    # passing placeholder dx with GridTraining, it uses data points irl
+    datapde_loss_functions, databc_loss_functions = if dataset_bc !== nothing ||
+                                                       dataset_pde !== nothing
+        merge_strategy_with_loglikelihood_function(pinnrep, GridTraining(0.1),
+            datafree_pde_loss_functions, datafree_bc_loss_functions,
+            train_sets_pde = dataset_pde, train_sets_bc = dataset_bc)
+    else
+        nothing, nothing
+    end
+
+    # this includes losses from dataset domain points as well as discretization points
+    function full_loss_function(θ, allstd::Vector{Vector{Float64}})
+        stdpdes, stdbcs, stdextra = allstd
+        # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
+        # SSE FOR LOSS ON GRIDPOINTS not MSE ! i, j depend on number of bcs and eqs
+        pde_loglikelihoods = sum([pde_loglike_function(θ, stdpdes[i])
+                                  for (i, pde_loglike_function) in enumerate(pde_loss_functions)])
+
+        bc_loglikelihoods = sum([bc_loglike_function(θ, stdbcs[j])
+                                 for (j, bc_loglike_function) in enumerate(bc_loss_functions)])
+
+        # final newloss creation components are similar to this
+        if !(datapde_loss_functions isa Nothing)
+            pde_loglikelihoods += sum([pde_loglike_function(θ, stdpdes[j])
+                                       for (j, pde_loglike_function) in enumerate(datapde_loss_functions)])
+        end
+
+        if !(databc_loss_functions isa Nothing)
+            bc_loglikelihoods += sum([bc_loglike_function(θ, stdbcs[j])
+                                      for (j, bc_loglike_function) in enumerate(databc_loss_functions)])
+        end
+
+        # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
+        # that's why we prefer the user to maintain the increment in the outer loop callback during optimization
+        @ignore_derivatives if self_increment
+            iteration[] += 1
+        end
+
+        @ignore_derivatives begin
+            reweight_losses_func(θ, pde_loglikelihoods,
+                bc_loglikelihoods)
+        end
+
+        weighted_pde_loglikelihood = adaloss.pde_loss_weights .* pde_loglikelihoods
+        weighted_bc_loglikelihood = adaloss.bc_loss_weights .* bc_loglikelihoods
+
+        sum_weighted_pde_loglikelihood = sum(weighted_pde_loglikelihood)
+        sum_weighted_bc_loglikelihood = sum(weighted_bc_loglikelihood)
+        weighted_loglikelihood_before_additional = sum_weighted_pde_loglikelihood +
+                                                   sum_weighted_bc_loglikelihood
+
+        full_weighted_loglikelihood = if additional_loss isa Nothing
+            weighted_loglikelihood_before_additional
+        else
+            (θ_, p_) = param_estim ? (θ.depvar, θ.p) : (θ, nothing)
+            _additional_loss = additional_loss(phi, θ_, p_)
+            _additional_loglikelihood = logpdf(Normal(0, stdextra), _additional_loss)
+
+            weighted_additional_loglikelihood = adaloss.additional_loss_weights[1] *
+                                                _additional_loglikelihood
+
+            weighted_loglikelihood_before_additional + weighted_additional_loglikelihood
+        end
+
+        return full_weighted_loglikelihood
+    end
+
+    return full_loss_function
+end
diff --git a/lib/BayesianNeuralPDE/src/pinn_types.jl b/lib/BayesianNeuralPDE/src/pinn_types.jl
new file mode 100644
index 000000000..bc0fb5fc5
--- /dev/null
+++ b/lib/BayesianNeuralPDE/src/pinn_types.jl
@@ -0,0 +1,33 @@
+"""
+	BayesianPINN(args...; dataset = nothing, kwargs...)
+
+A `discretize` algorithm for the ModelingToolkit PDESystem interface, which transforms a
+`PDESystem` into a likelihood function used for HMC based Posterior Sampling Algorithms
+[AdvancedHMC.jl](https://turinglang.org/AdvancedHMC.jl/stable/) which is later optimized
+upon to give the Solution Distribution of the PDE, using the Physics-Informed Neural
+Networks (PINN) methodology.
+
+All positional arguments and keyword arguments are passed to `PhysicsInformedNN` except
+the ones mentioned below.
+
+## Keyword Arguments
+
+* `dataset`: A vector of matrix, each matrix for ith dependant variable and first col in
+  matrix is for dependant variables, remaining columns for independent variables. Needed for
+  inverse problem solving.
+"""
+@concrete struct BayesianPINN <: AbstractPINN
+    pinn <: PhysicsInformedNN
+    dataset::Any
+end
+
+function Base.getproperty(pinn::BayesianPINN, name::Symbol)
+    name === :dataset && return getfield(pinn, :dataset)
+    name === :pinn && return getfield(pinn, :pinn)
+    return getproperty(pinn.pinn, name)
+end
+
+function BayesianPINN(args...; dataset = nothing, kwargs...)
+    dataset === nothing && (dataset = (nothing, nothing))
+    return BayesianPINN(PhysicsInformedNN(args...; kwargs...), dataset)
+end
diff --git a/test/BPINN_PDE_tests.jl b/lib/BayesianNeuralPDE/test/BPINN_PDE_tests.jl
similarity index 99%
rename from test/BPINN_PDE_tests.jl
rename to lib/BayesianNeuralPDE/test/BPINN_PDE_tests.jl
index 4f7caea7e..d8456e051 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/lib/BayesianNeuralPDE/test/BPINN_PDE_tests.jl
@@ -187,7 +187,7 @@ end
           AdvancedHMC, Statistics, Random, Functors, NeuralPDE, MonteCarloMeasurements,
           ComponentArrays
     import ModelingToolkit: Interval, infimum, supremum
-    import Flux
+    using Flux: Flux
 
     Random.seed!(100)
 
diff --git a/test/BPINN_tests.jl b/lib/BayesianNeuralPDE/test/BPINN_tests.jl
similarity index 99%
rename from test/BPINN_tests.jl
rename to lib/BayesianNeuralPDE/test/BPINN_tests.jl
index 37dc4e54d..ef485f4a7 100644
--- a/test/BPINN_tests.jl
+++ b/lib/BayesianNeuralPDE/test/BPINN_tests.jl
@@ -1,7 +1,7 @@
 @testitem "BPINN ODE I: Without Param Estimation" tags=[:odebpinn] begin
     using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
           AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
+    using Flux: Flux
 
     Random.seed!(100)
 
@@ -55,7 +55,7 @@ end
 @testitem "BPINN ODE II: With Parameter Estimation" tags=[:odebpinn] begin
     using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
           AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
+    using Flux: Flux
 
     Random.seed!(100)
 
@@ -123,7 +123,7 @@ end
 @testitem "BPINN ODE III" tags=[:odebpinn] begin
     using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
           AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
+    using Flux: Flux
 
     Random.seed!(100)
 
@@ -190,7 +190,7 @@ end
 @testitem "BPINN ODE: Translating from Flux" tags=[:odebpinn] begin
     using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
           AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
+    using Flux: Flux
 
     Random.seed!(100)
 
@@ -224,7 +224,7 @@ end
 @testitem "BPINN ODE III: with the new objective" tags=[:odebpinn] begin
     using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
           AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
+    using Flux: Flux
 
     Random.seed!(100)
 
@@ -304,7 +304,7 @@ end
 @testitem "BPINN ODE III: new objective solve call" tags=[:odebpinn] begin
     using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
           AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
+    using Flux: Flux
 
     Random.seed!(100)
 
@@ -355,7 +355,7 @@ end
 @testitem "BPINN ODE IV: Improvement" tags=[:odebpinn] begin
     using MCMCChains, Distributions, OrdinaryDiffEq, OptimizationOptimisers, Lux,
           AdvancedHMC, Statistics, Random, Functors, ComponentArrays, MonteCarloMeasurements
-    import Flux
+    using Flux: Flux
 
     Random.seed!(100)
 
diff --git a/lib/BayesianNeuralPDE/test/runtests.jl b/lib/BayesianNeuralPDE/test/runtests.jl
new file mode 100644
index 000000000..4fd53b60a
--- /dev/null
+++ b/lib/BayesianNeuralPDE/test/runtests.jl
@@ -0,0 +1,20 @@
+using ReTestItems, InteractiveUtils, Hwloc
+
+@info sprint(versioninfo)
+
+const GROUP = lowercase(get(ENV, "GROUP", "all"))
+
+# const RETESTITEMS_NWORKERS = parse(
+#     Int, get(ENV, "RETESTITEMS_NWORKERS", string(min(Hwloc.num_physical_cores(), 4))))
+# const RETESTITEMS_NWORKER_THREADS = parse(Int,
+#     get(ENV, "RETESTITEMS_NWORKER_THREADS",
+#         string(max(Hwloc.num_virtual_cores() ÷ RETESTITEMS_NWORKERS, 1))))
+
+using BayesianNeuralPDE
+
+@info "Running tests with $(RETESTITEMS_NWORKERS) workers and \
+    $(RETESTITEMS_NWORKER_THREADS) threads for group $(GROUP)"
+
+ReTestItems.runtests(BayesianNeuralPDE; tags = (GROUP == "all" ? nothing : [Symbol(GROUP)]),
+    nworkers = RETESTITEMS_NWORKERS,
+    nworker_threads = RETESTITEMS_NWORKER_THREADS, testitem_timeout = 3600)
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 55227371e..c79580ed1 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -43,13 +43,11 @@ using SymbolicUtils: SymbolicUtils
 using SymbolicIndexingInterface: SymbolicIndexingInterface
 
 # Needed for the Bayesian Stuff
-using AdvancedHMC: AdvancedHMC, DiagEuclideanMetric, HMC, HMCDA, Hamiltonian,
-                   JitteredLeapfrog, Leapfrog, MassMatrixAdaptor, NUTS, StanHMCAdaptor,
-                   StepSizeAdaptor, TemperedLeapfrog, find_good_stepsize
-using Distributions: Distributions, Distribution, MvNormal, Normal, dim, logpdf
+using AdvancedHMC: AdvancedHMC, HMCDA,
+                   NUTS
+using Distributions: Distributions, Distribution, MvNormal, dim, logpdf
 using LogDensityProblems: LogDensityProblems
-using MCMCChains: MCMCChains, Chains, sample
-using MonteCarloMeasurements: Particles
+using MCMCChains: MCMCChains
 
 import LuxCore: initialparameters, initialstates, parameterlength
 
@@ -86,16 +84,12 @@ include("discretize.jl")
 
 include("neural_adapter.jl")
 include("advancedHMC_MCMC.jl")
-include("BPINN_ode.jl")
-include("PDE_BPINN.jl")
 
 include("dgm.jl")
 
 export PINOODE
 export NNODE, NNDAE
-export BNNODE, ahmc_bayesian_pinn_ode, ahmc_bayesian_pinn_pde
 export PhysicsInformedNN, discretize
-export BPINNsolution, BayesianPINN
 export DeepGalerkin
 
 export neural_adapter
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index f7f18e09b..726614255 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -259,245 +259,4 @@ function kernelchoice(Kernel, MCMCkwargs)
     else # HMC
         Kernel(MCMCkwargs[:n_leapfrog])
     end
-end
-
-"""
-    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining, dataset = [nothing],
-                           init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0f0,
-                           l2std = [0.05], phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0),
-                           param = [], nchains = 1, autodiff = false, Kernel = HMC,
-                           Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                               Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-                           Integratorkwargs = (Integrator = Leapfrog,),
-                           MCMCkwargs = (n_leapfrog = 30,), progress = false,
-                           verbose = false)
-
-!!! warn
-
-    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the
-    out-of-place form, i.e. `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared
-    out-of-place, then `ahmc_bayesian_pinn_ode()` will exit with an error.
-
-## Example
-
-```julia
-linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
-tspan = (0.0, 10.0)
-u0 = 0.0
-p = [5.0, -5.0]
-prob = ODEProblem(linear, u0, tspan, p)
-
-### CREATE DATASET (Necessity for accurate Parameter estimation)
-sol = solve(prob, Tsit5(); saveat = 0.05)
-u = sol.u[1:100]
-time = sol.t[1:100]
-
-### dataset and BPINN create
-x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
-dataset = [x̂, time]
-
-chain1 = Lux.Chain(Lux.Dense(1, 5, tanh), Lux.Dense(5, 5, tanh), Lux.Dense(5, 1)
-
-### simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
-fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chain1,
-                                                            dataset = dataset,
-                                                            draw_samples = 1500,
-                                                            l2std = [0.05],
-                                                            phystd = [0.05],
-                                                            priorsNNw = (0.0,3.0))
-
-### solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
-fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chain1,
-                                                            dataset = dataset,
-                                                            draw_samples = 1500,
-                                                            l2std = [0.05],
-                                                            phystd = [0.05],
-                                                            priorsNNw = (0.0,3.0),
-                                                            param = [Normal(6.5,0.5), Normal(-3,0.5)])
-```
-
-## NOTES
-
-Dataset is required for accurate Parameter estimation + solving equations
-Incase you are only solving the Equations for solution, do not provide dataset
-
-## Positional Arguments
-
-* `prob`: DEProblem(out of place and the function signature should be f(u,p,t).
-* `chain`: Lux Neural Netork which would be made the Bayesian PINN.
-
-## Keyword Arguments
-
-* `strategy`: The training strategy used to choose the points for the evaluations. By
-  default GridTraining is used with given physdt discretization.
-* `init_params`: initial parameter values for BPINN (ideally for multiple chains different
-  initializations preferred)
-* `nchains`: number of chains you want to sample
-* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are
-  ~2/3 of draw samples)
-* `l2std`: standard deviation of BPINN prediction against L2 losses/Dataset
-* `phystd`: standard deviation of BPINN prediction against Chosen Underlying ODE System
-* `phynewstd`: standard deviation of new loss func term
-* `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of
-  BPINN are Normal Distributions by default.
-* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
-* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
-* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
-* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implementations HMC/NUTS/HMCDA)
-* `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`.
-  Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-* `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`.
-  Refer: https://turinglang.org/AdvancedHMC.jl/stable/ Note: Target percentage (in decimal)
-  of iterations in which the proposals are accepted (0.8 by default)
-* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's (HMC/NUTS/HMCDA)
-  Arguments, as follows :
-    * `n_leapfrog`: number of leapfrog steps for HMC
-    * `δ`: target acceptance probability for NUTS and HMCDA
-    * `λ`: target trajectory length for HMCDA
-    * `max_depth`: Maximum doubling tree depth (NUTS)
-    * `Δ_max`: Maximum divergence during doubling tree (NUTS)
-    Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-* `progress`: controls whether to show the progress meter or not.
-* `verbose`: controls the verbosity. (Sample call args in AHMC)
-
-!!! warning
-
-    AdvancedHMC.jl is still developing convenience structs so might need changes on new
-    releases.
-"""
-function ahmc_bayesian_pinn_ode(
-        prob::SciMLBase.ODEProblem, chain; strategy = GridTraining, dataset = [nothing],
-        init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0, l2std = [0.05],
-        phystd = [0.05], phynewstd = [0.05], priorsNNw = (0.0, 2.0), param = [], nchains = 1,
-        autodiff = false, Kernel = HMC,
-        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-        Integratorkwargs = (Integrator = Leapfrog,), MCMCkwargs = (n_leapfrog = 30,),
-        progress = false, verbose = false, estim_collocate = false)
-    @assert !isinplace(prob) "The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."
-
-    chain isa AbstractLuxLayer || (chain = FromFluxAdaptor()(chain))
-
-    strategy = strategy == GridTraining ? strategy(physdt) : strategy
-
-    if dataset != [nothing] &&
-       (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
-        error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}")
-    end
-
-    if dataset != [nothing] && param == []
-        println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
-    elseif dataset == [nothing] && param != []
-        error("Dataset Required for Parameter Estimation.")
-    end
-
-    initial_nnθ, chain, st = generate_ltd(chain, init_params)
-
-    @assert nchains≤Threads.nthreads() "number of chains is greater than available threads"
-    @assert nchains≥1 "number of chains must be greater than 1"
-
-    # eltype(physdt) cause needs Float64 for find_good_stepsize
-    # Lux chain(using component array later as vector_to_parameter need namedtuple)
-    T = eltype(physdt)
-    initial_θ = getdata(ComponentArray{T}(initial_nnθ))
-
-    # adding ode parameter estimation
-    nparameters = length(initial_θ)
-    ninv = length(param)
-    priors = [
-        MvNormal(T(priorsNNw[1]) * ones(T, nparameters),
-        Diagonal(abs2.(T(priorsNNw[2]) .* ones(T, nparameters))))
-    ]
-
-    # append Ode params to all paramvector
-    if ninv > 0
-        # shift ode params(initialise ode params by prior means)
-        initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
-        priors = vcat(priors, param)
-        nparameters += ninv
-    end
-
-    smodel = StatefulLuxLayer{true}(chain, nothing, st)
-    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
-    ℓπ = LogTargetDensity(nparameters, prob, smodel, strategy, dataset, priors,
-        phystd, phynewstd, l2std, autodiff, physdt, ninv, initial_nnθ, estim_collocate)
-
-    if verbose
-        @printf("Current Physics Log-likelihood: %g\n", physloglikelihood(ℓπ, initial_θ))
-        @printf("Current Prior Log-likelihood: %g\n", priorweights(ℓπ, initial_θ))
-        @printf("Current SSE against dataset Log-likelihood: %g\n",
-            L2LossData(ℓπ, initial_θ))
-        if estim_collocate
-            @printf("Current gradient loss against dataset Log-likelihood: %g\n",
-                L2loss2(ℓπ, initial_θ))
-        end
-    end
-
-    Adaptor = Adaptorkwargs[:Adaptor]
-    Metric = Adaptorkwargs[:Metric]
-    targetacceptancerate = Adaptorkwargs[:targetacceptancerate]
-
-    # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
-    metric = Metric(nparameters)
-    hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
-
-    # parallel sampling option
-    if nchains != 1
-        # Cache to store the chains
-        chains = Vector{Any}(undef, nchains)
-        statsc = Vector{Any}(undef, nchains)
-        samplesc = Vector{Any}(undef, nchains)
-
-        Threads.@threads for i in 1:nchains
-            # each chain has different initial NNparameter values(better posterior exploration)
-            initial_θ = vcat(
-                randn(eltype(initial_θ), nparameters - ninv),
-                initial_θ[(nparameters - ninv + 1):end]
-            )
-            initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-            integrator = integratorchoice(Integratorkwargs, initial_ϵ)
-            adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
-                StepSizeAdaptor(targetacceptancerate, integrator))
-
-            MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
-            Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
-            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
-                progress = progress, verbose = verbose)
-
-            samplesc[i] = samples
-            statsc[i] = stats
-            mcmc_chain = Chains(reduce(hcat, samples)')
-            chains[i] = mcmc_chain
-        end
-
-        return chains, samplesc, statsc
-    else
-        initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
-        integrator = integratorchoice(Integratorkwargs, initial_ϵ)
-        adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
-            StepSizeAdaptor(targetacceptancerate, integrator))
-
-        MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
-        Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
-        samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
-            adaptor; progress = progress, verbose = verbose)
-
-        if verbose
-            println("Sampling Complete.")
-            @printf("Final Physics Log-likelihood: %g\n",
-                physloglikelihood(ℓπ, samples[end]))
-            @printf("Final Prior Log-likelihood: %g\n", priorweights(ℓπ, samples[end]))
-            @printf("Final SSE against dataset Log-likelihood: %g\n",
-                L2LossData(ℓπ, samples[end]))
-            if estim_collocate
-                @printf("Final gradient loss against dataset Log-likelihood: %g\n",
-                    L2loss2(ℓπ, samples[end]))
-            end
-        end
-
-        # return a chain(basic chain),samples and stats
-        matrix_samples = reshape(hcat(samples...), (length(samples[1]), length(samples), 1))
-        mcmc_chain = MCMCChains.Chains(matrix_samples)
-        return mcmc_chain, samples, stats
-    end
-end
+end
\ No newline at end of file
diff --git a/src/discretize.jl b/src/discretize.jl
index 43653ba7c..e1778a3b9 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -523,85 +523,6 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem, discretization::Ab
         return full_loss_function
     end
 
-    function get_likelihood_estimate_function(discretization::BayesianPINN)
-        dataset_pde, dataset_bc = discretization.dataset
-
-        pde_loss_functions, bc_loss_functions = merge_strategy_with_loglikelihood_function(
-            pinnrep, strategy,
-            datafree_pde_loss_functions, datafree_bc_loss_functions)
-
-        # required as Physics loss also needed on the discrete dataset domain points
-        # data points are discrete and so by default GridTraining loss applies
-        # passing placeholder dx with GridTraining, it uses data points irl
-        datapde_loss_functions, databc_loss_functions = if dataset_bc !== nothing ||
-                                                           dataset_pde !== nothing
-            merge_strategy_with_loglikelihood_function(pinnrep, GridTraining(0.1),
-                datafree_pde_loss_functions, datafree_bc_loss_functions,
-                train_sets_pde = dataset_pde, train_sets_bc = dataset_bc)
-        else
-            nothing, nothing
-        end
-
-        # this includes losses from dataset domain points as well as discretization points
-        function full_loss_function(θ, allstd::Vector{Vector{Float64}})
-            stdpdes, stdbcs, stdextra = allstd
-            # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
-            # SSE FOR LOSS ON GRIDPOINTS not MSE ! i, j depend on number of bcs and eqs
-            pde_loglikelihoods = sum([pde_loglike_function(θ, stdpdes[i])
-                                      for (i, pde_loglike_function) in enumerate(pde_loss_functions)])
-
-            bc_loglikelihoods = sum([bc_loglike_function(θ, stdbcs[j])
-                                     for (j, bc_loglike_function) in enumerate(bc_loss_functions)])
-
-            # final newloss creation components are similar to this
-            if !(datapde_loss_functions isa Nothing)
-                pde_loglikelihoods += sum([pde_loglike_function(θ, stdpdes[j])
-                                           for (j, pde_loglike_function) in enumerate(datapde_loss_functions)])
-            end
-
-            if !(databc_loss_functions isa Nothing)
-                bc_loglikelihoods += sum([bc_loglike_function(θ, stdbcs[j])
-                                          for (j, bc_loglike_function) in enumerate(databc_loss_functions)])
-            end
-
-            # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
-            # that's why we prefer the user to maintain the increment in the outer loop callback during optimization
-            @ignore_derivatives if self_increment
-                iteration[] += 1
-            end
-
-            @ignore_derivatives begin
-                reweight_losses_func(θ, pde_loglikelihoods,
-                    bc_loglikelihoods)
-            end
-
-            weighted_pde_loglikelihood = adaloss.pde_loss_weights .* pde_loglikelihoods
-            weighted_bc_loglikelihood = adaloss.bc_loss_weights .* bc_loglikelihoods
-
-            sum_weighted_pde_loglikelihood = sum(weighted_pde_loglikelihood)
-            sum_weighted_bc_loglikelihood = sum(weighted_bc_loglikelihood)
-            weighted_loglikelihood_before_additional = sum_weighted_pde_loglikelihood +
-                                                       sum_weighted_bc_loglikelihood
-
-            full_weighted_loglikelihood = if additional_loss isa Nothing
-                weighted_loglikelihood_before_additional
-            else
-                (θ_, p_) = param_estim ? (θ.depvar, θ.p) : (θ, nothing)
-                _additional_loss = additional_loss(phi, θ_, p_)
-                _additional_loglikelihood = logpdf(Normal(0, stdextra), _additional_loss)
-
-                weighted_additional_loglikelihood = adaloss.additional_loss_weights[1] *
-                                                    _additional_loglikelihood
-
-                weighted_loglikelihood_before_additional + weighted_additional_loglikelihood
-            end
-
-            return full_weighted_loglikelihood
-        end
-
-        return full_loss_function
-    end
-
     full_loss_function = get_likelihood_estimate_function(discretization)
     pinnrep.loss_functions = PINNLossFunctions(bc_loss_functions, pde_loss_functions,
         full_loss_function, additional_loss, datafree_pde_loss_functions,
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 6a7d617e9..33d992ba6 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -138,40 +138,6 @@ function PhysicsInformedNN(
         multioutput, kwargs)
 end
 
-"""
-    BayesianPINN(args...; dataset = nothing, kwargs...)
-
-A `discretize` algorithm for the ModelingToolkit PDESystem interface, which transforms a
-`PDESystem` into a likelihood function used for HMC based Posterior Sampling Algorithms
-[AdvancedHMC.jl](https://turinglang.org/AdvancedHMC.jl/stable/) which is later optimized
-upon to give the Solution Distribution of the PDE, using the Physics-Informed Neural
-Networks (PINN) methodology.
-
-All positional arguments and keyword arguments are passed to `PhysicsInformedNN` except
-the ones mentioned below.
-
-## Keyword Arguments
-
-* `dataset`: A vector of matrix, each matrix for ith dependant variable and first col in
-  matrix is for dependant variables, remaining columns for independent variables. Needed for
-  inverse problem solving.
-"""
-@concrete struct BayesianPINN <: AbstractPINN
-    pinn <: PhysicsInformedNN
-    dataset
-end
-
-function Base.getproperty(pinn::BayesianPINN, name::Symbol)
-    name === :dataset && return getfield(pinn, :dataset)
-    name === :pinn && return getfield(pinn, :pinn)
-    return getproperty(pinn.pinn, name)
-end
-
-function BayesianPINN(args...; dataset = nothing, kwargs...)
-    dataset === nothing && (dataset = (nothing, nothing))
-    return BayesianPINN(PhysicsInformedNN(args...; kwargs...), dataset)
-end
-
 """
 `PINNRepresentation``