diff --git a/.gitignore b/.gitignore
index 49d903c..6fb14f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@
 .depend
 bin
 source/build
+docs/build
diff --git a/config/make.gnu_openmpi b/config/make.gnu_openmpi
index 1069acc..67876fd 100644
--- a/config/make.gnu_openmpi
+++ b/config/make.gnu_openmpi
@@ -1,6 +1,6 @@
 # Fortran compiler configuration
 FC = mpif90
-FC_OPTS = -g -O3 -fdefault-real-8 -fdefault-double-8 -fopenmp -ffree-form 
+FC_OPTS = -Wa,-q -cpp -g -O3 -fdefault-real-8 -fdefault-double-8 -fopenmp -ffree-form 
 # -fbacktrace -fbounds-check
 # -fbounds-check
 # FC_OPTS = -g -Wall -Wextra -Wconversion -fimplicit-none -fbacktrace \
@@ -12,7 +12,7 @@ CC = gcc
 CC_OPTS = -O3 -std=c99
 
 # Preprocessor configuration
-FPP = gcc -C -P -E -cpp
+FPP = gcc -C -P -E -x assembler-with-cpp
 
 # Python 2.x configuration. Used for automatically generate compile rules
 PYTHON = python 
diff --git a/config/make.template.gnu b/config/make.template.gnu
index 93748f3..bec206e 100644
--- a/config/make.template.gnu
+++ b/config/make.template.gnu
@@ -10,7 +10,7 @@
 FC = mpif90
 
 # Configure the compiler options
-FC_OPTS = -g -O3 -fdefault-real-8 -fdefault-double-8 -fopenmp -ffree-form
+FC_OPTS = -Wa,-q -cpp -g -O3 -fdefault-real-8 -fdefault-double-8 -fopenmp -ffree-form
 
 # Enable the generation of run-time checks. The argument shall be a comma-delimited 
 # list of the following keywords:
@@ -45,7 +45,7 @@ CC = gcc
 CC_OPTS = -O3 -std=c99
 
 # Preprocessor configuration
-FPP = gcc -C -P -E -cpp
+FPP = gcc -C -P -E -x assembler-with-cpp 
 
 # ------------------------------------------------------------------------------
 # Configure the modules that will NOT be installed
diff --git a/source/Makefile b/source/Makefile
index 820e218..6b0b208 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -90,11 +90,13 @@ $(main_obj) : $(configfile) | $(builddir)
 $(builddir) :
 	@echo "Creating $@ directory"
 	@mkdir -p $@
+	@mkdir -p $@/preprocessed
 
 $(builddir)/%.o : %.f03
 	@echo "[F03] $(<F)"
 	@$(FPP) $(FPPF) -I. -I./$(builddir) $< > $(builddir)/$(<F)
-	@cd $(builddir) && $(FC) $(INCF) -c $(FC_OPTS) $(<F) -o $(@F) && rm $(<F)
+	@cd $(builddir) && $(FC) $(INCF) -c $(FC_OPTS) $(<F) 
+	@cd $(builddir) && mv $(<F) ./preprocessed
 
 $(builddir)/%.o : %.c
 	@echo "[CC] $(<F)"
diff --git a/source/fields/field_b_class.f03 b/source/fields/field_b_class.f03
index e87e164..2b6a527 100644
--- a/source/fields/field_b_class.f03
+++ b/source/fields/field_b_class.f03
@@ -112,7 +112,9 @@ subroutine init_field_b( this, opts, max_mode, part_shape, boundary, relax_fac,
 
   case ( p_ps_quadratic )
 
-    call write_err( "Quadratic particle shape not implemented." )
+    gc_num(:,1) = (/1, 2/)
+    gc_num(:,2) = (/0, 1/)
+    ! call write_err( "Quadratic particle shape not implemented." )
 
   case default
 
diff --git a/source/fields/field_e_class.f03 b/source/fields/field_e_class.f03
index 5dd8c65..7def024 100644
--- a/source/fields/field_e_class.f03
+++ b/source/fields/field_e_class.f03
@@ -88,7 +88,9 @@ subroutine init_field_e( this, opts, max_mode, part_shape, boundary, entity )
 
   case ( p_ps_quadratic )
 
-    call write_err( "Quadratic particle shape not implemented." )
+    gc_num(:,1) = (/1, 2/)
+    gc_num(:,2) = (/0, 1/)
+    ! call write_err( "Quadratic particle shape not implemented." )
 
   case default
 
diff --git a/source/fields/field_psi_class.f03 b/source/fields/field_psi_class.f03
index f50c3d3..b97cb1c 100644
--- a/source/fields/field_psi_class.f03
+++ b/source/fields/field_psi_class.f03
@@ -79,7 +79,10 @@ subroutine init_field_psi( this, opts, max_mode, part_shape, boundary )
 
   case ( p_ps_quadratic )
 
-    call write_err( "Quadratic particle shape not implemented." )
+    
+    gc_num(:,1) = (/1, 2/)
+    gc_num(:,2) = (/0, 1/)
+    ! call write_err( "Quadratic particle shape not implemented." )
 
   case default
 
diff --git a/source/fields/field_src_class.f03 b/source/fields/field_src_class.f03
index f408768..c099f7b 100644
--- a/source/fields/field_src_class.f03
+++ b/source/fields/field_src_class.f03
@@ -81,8 +81,11 @@ subroutine init_field_rho(this, opts, max_mode, part_shape, smooth_order, has_2d
       gc_num(:,2) = (/0, 1/)
 
     case ( p_ps_quadratic )
-      call write_err( "Quadratic particle shape not implemented." )
-
+        
+    !   call write_err( "Quadratic particle shape not implemented." )
+        gc_num(:,1) = (/1, 2/)
+        gc_num(:,2) = (/0, 2/)
+        
     case default
       call write_err( "Invalid particle shape." )
 
@@ -149,8 +152,11 @@ subroutine init_field_jay(this, opts, max_mode, part_shape, smooth_order)
       gc_num(:,2) = (/0, 1/)
 
     case ( p_ps_quadratic )
-
-      call write_err( "Quadratic particle shape not implemented." )
+        
+      gc_num(:,1) = (/1, 2/)
+      gc_num(:,2) = (/0, 2/)
+        
+    !   call write_err( "Quadratic particle shape not implemented." )
 
     case default
 
@@ -223,8 +229,11 @@ subroutine init_field_djdxi( this, opts, max_mode, part_shape, smooth_order )
     gc_num(:,2) = (/0, 1/)
 
   case ( p_ps_quadratic )
-
-    call write_err( "Quadratic particle shape not implemented." )
+    
+    gc_num(:,1) = (/1, 2/)
+    gc_num(:,2) = (/0, 1/)
+      
+    ! call write_err( "Quadratic particle shape not implemented." )
 
   case default
 
diff --git a/source/fields/field_vpot_class.f03 b/source/fields/field_vpot_class.f03
index 1f7f253..ab9a935 100644
--- a/source/fields/field_vpot_class.f03
+++ b/source/fields/field_vpot_class.f03
@@ -97,7 +97,9 @@ subroutine init_field_vpot( this, opts, max_mode, part_shape, boundary )
 
   case ( p_ps_quadratic )
 
-    call write_err( "Quadratic particle shape not implemented." )
+    gc_num(:,1) = (/1, 2/)
+    gc_num(:,2) = (/0, 1/)
+    ! call write_err( "Quadratic particle shape not implemented." )
 
   case default
 
diff --git a/source/interpolation.f03 b/source/interpolation.f03
index 1941650..a819a85 100644
--- a/source/interpolation.f03
+++ b/source/interpolation.f03
@@ -29,14 +29,20 @@ subroutine spline_quadratic( x, s )
   real, intent(in) :: x
   real, dimension(-1:1), intent(inout) :: s
   
-  real :: t0, t1
+! Previous work, unfinished
+!   real :: t0, t1
   
-  t0 = 1.0 - x
-  t1 = x
+!   t0 = 1.0 - x
+!   t1 = x
   
-  s(-1) = 0.5 * t0**2
-  s( 0) = 0.5 + t0*t1
-  s( 1) = 0.5 * t1**2
+!   s(-1) = 0.5 * t0**2
+!   s( 0) = 0.5 + t0*t1
+!   s( 1) = 0.5 * t1**2
+
+  ! the x is in [-0.5, 0.5)
+  s(-1) = 0.5*(0.5-x)**2
+  s( 0) = 3.0/4.0 - x**2
+  s( 1) = 0.5*(0.5+x)**2
 
 end subroutine spline_quadratic
 
diff --git a/source/sim_beams_class.f03 b/source/sim_beams_class.f03
index db1d423..839d4f7 100644
--- a/source/sim_beams_class.f03
+++ b/source/sim_beams_class.f03
@@ -95,8 +95,11 @@ subroutine init_sim_beams( this, input, opts )
   select case ( trim(str) )
   case ( 'linear' )
     ps = p_ps_linear
+  case ( 'quadratic' )
+    ps = p_ps_quadratic
+    call write_stdout('Attention! Quadratic interpolation for beams not implemented!')
   case default
-    call write_err( 'Invalid interpolation type! Only "linear" are supported &
+    call write_err( 'Invalid interpolation type! Only "linear" and "quadratic" are supported &
       &currently.' )
   end select
 
diff --git a/source/sim_fields_class.f03 b/source/sim_fields_class.f03
index f11266b..b80bd20 100644
--- a/source/sim_fields_class.f03
+++ b/source/sim_fields_class.f03
@@ -128,8 +128,11 @@ subroutine init_sim_fields( this, input, opts )
   select case ( trim(str) )
   case ( 'linear' )
     ps = p_ps_linear
+  case ( 'quadratic' )
+    ps = p_ps_quadratic
+    ! call write_stdout('Attention! Quadratic interpolation for fields not implemented!')
   case default
-    call write_err( 'Invalid interpolation type! Only "linear" are supported currently.' )
+    call write_err( 'Invalid interpolation type! Only "linear" and "quadratic" are supported currently.' )
   end select
 
   relax_fac = 1.0d-3
diff --git a/source/sim_lasers_class.f03 b/source/sim_lasers_class.f03
index bda4951..84ba1b4 100644
--- a/source/sim_lasers_class.f03
+++ b/source/sim_lasers_class.f03
@@ -91,6 +91,7 @@ subroutine init_sim_lasers( this, input, opts )
   ! local data
   character(len=32), save :: sname = 'init_sim_lasers'
   character(len=:), allocatable :: cym_str
+  character(len=:), allocatable :: interp_str
   integer :: max_mode, iter, i, m, rst_timestep
   integer, dimension(2,2) :: gc_num
   real :: k0 
@@ -104,10 +105,20 @@ subroutine init_sim_lasers( this, input, opts )
   call input%get( 'simulation.read_restart', read_rst )
   call kwargs%append( 'iter', 0 )
   call kwargs%append( 'k0', 10.0 )
-
-  gc_num(:,1) = (/1, 1/)
-  gc_num(:,2) = (/2, 1/)
-
+  
+  call input%get ( 'simulation.interpolation', interp_str)
+  select case ( trim(interp_str) )
+  case ( 'linear' )
+    gc_num(:,1) = (/1, 1/)
+    gc_num(:,2) = (/2, 1/)
+  case ( 'quadratic' )
+    gc_num(:,1) = (/1, 2/)
+    gc_num(:,2) = (/2, 1/)
+  case default
+    call write_err( 'Invalid interpolation type! Only "linear" and "quadratic" are supported &
+      &currently.' )
+  end select
+   
   do i = 1, this%num_lasers
     call input%get( 'laser(' // num2str(i) // ').iteration', iter )
     call input%get( 'laser(' // num2str(i) // ').k0', k0 )
diff --git a/source/sim_plasma_class.f03 b/source/sim_plasma_class.f03
index 67ab8fd..cc56cff 100644
--- a/source/sim_plasma_class.f03
+++ b/source/sim_plasma_class.f03
@@ -88,8 +88,11 @@ subroutine init_sim_plasma( this, input, opts, s )
   select case ( trim(str) )
   case ( 'linear' )
     ps = p_ps_linear
+  case ( 'quadratic' )
+    ps = p_ps_quadratic
+    call write_stdout('Attention! Quadratic interpolation for plasma not finised!')
   case default
-    call write_err( 'Invalid interpolation type! Only "linear" are supported currently.' )
+    call write_err( 'Invalid interpolation type! Only "linear" and "quadratic" are supported currently.' )
   end select
 
   ! initialize profiles of species and neutrals
@@ -170,7 +173,7 @@ subroutine init_sim_plasma( this, input, opts, s )
       call input%get( 'neutrals('//num2str(i)//').smooth_order', sm_ord )
     endif
 
-    call this%neut(i)%new( opts, this%pf_neut(i), max_mode, elem, ion_max, &
+    call this%neut(i)%new( opts, this%pf_neut(i), ps, max_mode, elem, ion_max, &
       qbm, omega_p, s, push_type, sm_ord )
 
   enddo
diff --git a/source/species/interp_part2d.f03 b/source/species/interp_part2d.f03
index 383d35b..20bb3aa 100644
--- a/source/species/interp_part2d.f03
+++ b/source/species/interp_part2d.f03
@@ -8,13 +8,15 @@ module interp_part2d
 
 private
 
-interface gen_interp_info
-    module procedure gen_interp_info_part2d
-end interface
+! interface gen_interp_info
+!     module procedure gen_interp_info_part2d_s1
+!     module procedure gen_interp_info_part2d_s2
+! end interface
 
 interface interp_field
     module procedure interp_field_part2d_scalar
     module procedure interp_field_part2d_vector
+
 end interface
 
 interface transform_to_cartesian
@@ -25,51 +27,88 @@ module interp_part2d
 
 contains
 
-subroutine gen_interp_info_part2d(x, dr, noff, np, ptrcur, weight, idx, pcos, psin)
-
+subroutine gen_interp_info(x, dr, noff, np, ptrcur, weight, idx, pcos, psin)
     implicit none
     real, dimension(:, :), intent(in) :: x
     real, intent(in) :: dr
     integer, intent(in) :: noff, np
     integer(kind=LG), intent(in) :: ptrcur
-    real, dimension(0:, :), intent(out) :: weight
+    real, dimension(:, :), intent(inout), allocatable :: weight
     integer, dimension(:), intent(out) :: idx
     real, dimension(:), intent(out) :: pcos, psin
-
+    
     integer :: i
     integer(kind=LG) :: pp
     real :: pos, idr
-    real, dimension(0:1) :: wt
-
+    real, dimension(:), allocatable :: wt
+    
     idr = 1.0 / dr
-
+    allocate(wt(size(weight)))
     pp = ptrcur
-    do i = 1, np
-
-        pos = sqrt(x(1, pp)**2 + x(2, pp)**2)
-
-        ! cosine and sine
-        pcos(i) = x(1, pp) / pos
-        psin(i) = x(2, pp) / pos
-
-        pos = pos * idr
-        idx(i) = int(pos) - noff + 1
-        pos = pos - int(pos)
-        call spline_linear(pos, wt)
-        weight(0:1, i) = wt
-
-        pp = pp + 1
+    if (lbound(weight, 1) == 0) then
+        do i = 1, np
+    
+            pos = sqrt(x(1, pp)**2 + x(2, pp)**2)
+    
+            ! cosine and sine
+            pcos(i) = x(1, pp) / pos
+            psin(i) = x(2, pp) / pos
+    
+            pos = pos * idr
+            idx(i) = floor(pos) - noff + 1
+            pos = pos - floor(pos)
+            call spline_linear(pos, wt)
+            weight(0:1, i) = wt
+    
+            pp = pp + 1
+    
+        enddo
 
-    enddo
+    else if (lbound(weight, 1) == -1) then
+        do i = 1, np
+    
+            pos = sqrt(x(1, pp)**2 + x(2, pp)**2)
+    
+            ! cosine and sine
+            pcos(i) = x(1, pp) / pos
+            psin(i) = x(2, pp) / pos
+    
+            pos = pos * idr
+            idx(i) = floor(pos) - noff + 1
+            pos = pos - floor(pos)
+            idx(i) = idx(i) + floor(pos - 0.5) + 1
+            pos = pos - floor(pos-0.5) - 1
+
+            call spline_quadratic(pos, wt)
+            weight(-1:1, i) = wt
+    
+            pp = pp + 1
+    
+        enddo
+        
+        if (noff == 0) then
+            do i = 1, np
+                ! It is inevitable
+                if (idx(i) == 1) then
+                    weight( -1, i ) = 0
+                    weight(  0, i ) = weight( 0, i ) + weight( -1, i )
+                endif
+            enddo
+        endif
+        
+    else
+        print *, 'Invalid bounds of weight'
+    end if
+    
+end subroutine gen_interp_info
 
-end subroutine gen_interp_info_part2d
 
 subroutine interp_field_part2d_vector(f_re, f_im, max_mode, weight, idx, pcos, psin, np, fp)
 
     implicit none
     type(ufield), dimension(:), pointer, intent(in) :: f_re, f_im
     integer, intent(in) :: max_mode, np
-    real, dimension(0:, :), intent(in) :: weight
+    real, dimension(:, :), intent(inout), allocatable :: weight
     integer, dimension(:), intent(in) :: idx
     real, dimension(:), intent(in) :: pcos, psin
     real, dimension(:, :), intent(inout) :: fp
@@ -81,31 +120,59 @@ subroutine interp_field_part2d_vector(f_re, f_im, max_mode, weight, idx, pcos, p
 
     fp = 0.0
     phase = cmplx(1.0, 0.0)
-
-    ! interpolate m = 0 mode
-    ptr_f_re => f_re(0)%get_f1()
-    do i = 1, np
-        do j = 0, 1
-            fp(:, i) = fp(:, i) + ptr_f_re(:, idx(i) + j) * weight(j, i)
+    
+    if (lbound(weight, 1) == 0) then
+        ! interpolate m = 0 mode
+        ptr_f_re => f_re(0)%get_f1()
+        do i = 1, np
+            do j = 0, 1
+                fp(:, i) = fp(:, i) + ptr_f_re(:, idx(i) + j) * weight(j, i)
+            enddo
         enddo
-    enddo
-
-    ! interpolate m > 0 modes
-    do mode = 1, max_mode
-        ptr_f_re => f_re(mode)%get_f1()
-        ptr_f_im => f_im(mode)%get_f1()
 
+        ! interpolate m > 0 modes
+        do mode = 1, max_mode
+            ptr_f_re => f_re(mode)%get_f1()
+            ptr_f_im => f_im(mode)%get_f1()
+
+            do i = 1, np
+                phase(i) = phase(i) * cmplx(pcos(i), psin(i))
+                phase_re = 2.0 * real(phase(i))
+                phase_im = 2.0 * aimag(phase(i))
+                do j = 0, 1
+                    fp(:, i) = fp(:, i) + (ptr_f_re(:, idx(i) + j) * phase_re - &
+                                        ptr_f_im(:, idx(i) + j) * phase_im) * weight(j, i)
+                enddo
+            enddo
+        enddo
+    else if (lbound(weight, 1) == -1) then
+        ! interpolate m = 0 mode
+        ptr_f_re => f_re(0)%get_f1()
         do i = 1, np
-            phase(i) = phase(i) * cmplx(pcos(i), psin(i))
-            phase_re = 2.0 * real(phase(i))
-            phase_im = 2.0 * aimag(phase(i))
-            do j = 0, 1
-                fp(:, i) = fp(:, i) + (ptr_f_re(:, idx(i) + j) * phase_re - &
-                                       ptr_f_im(:, idx(i) + j) * phase_im) * weight(j, i)
+            do j = -1, 1
+                fp(:, i) = fp(:, i) + ptr_f_re(:, idx(i) + j) * weight(j, i)
             enddo
         enddo
-    enddo
 
+        ! interpolate m > 0 modes
+        do mode = 1, max_mode
+            ptr_f_re => f_re(mode)%get_f1()
+            ptr_f_im => f_im(mode)%get_f1()
+
+            do i = 1, np
+                phase(i) = phase(i) * cmplx(pcos(i), psin(i))
+                phase_re = 2.0 * real(phase(i))
+                phase_im = 2.0 * aimag(phase(i))
+                do j = -1, 1
+                    fp(:, i) = fp(:, i) + (ptr_f_re(:, idx(i) + j) * phase_re - &
+                                        ptr_f_im(:, idx(i) + j) * phase_im) * weight(j, i)
+                enddo
+            enddo
+        enddo
+    else
+        print *, 'Invalid lower bound of weight'
+    end if
+    
 end subroutine interp_field_part2d_vector
 
 subroutine interp_field_part2d_scalar(f_re, f_im, max_mode, weight, idx, pcos, psin, np, fp)
@@ -113,7 +180,7 @@ subroutine interp_field_part2d_scalar(f_re, f_im, max_mode, weight, idx, pcos, p
     implicit none
     type(ufield), dimension(:), pointer, intent(in) :: f_re, f_im
     integer, intent(in) :: max_mode, np
-    real, dimension(0:, :), intent(in) :: weight
+    real, dimension(:, :), intent(inout), allocatable :: weight
     integer, dimension(:), intent(in) :: idx
     real, dimension(:), intent(in) :: pcos, psin
     real, dimension(:), intent(inout) :: fp
@@ -126,32 +193,61 @@ subroutine interp_field_part2d_scalar(f_re, f_im, max_mode, weight, idx, pcos, p
     fp = 0.0
     phase = cmplx(1.0, 0.0)
 
-    ! interpolate m = 0 mode
-    ptr_f_re => f_re(0)%get_f1()
-    do i = 1, np
-        do j = 0, 1
-            fp(i) = fp(i) + ptr_f_re(1, idx(i) + j) * weight(j, i)
+    if (lbound(weight, 1) == 0) then
+        ! interpolate m = 0 mode
+        ptr_f_re => f_re(0)%get_f1()
+        do i = 1, np
+            do j = 0, 1
+                fp(i) = fp(i) + ptr_f_re(1, idx(i) + j) * weight(j, i)
+            enddo
         enddo
-    enddo
-
-    ! interpolate m > 0 modes
-    do mode = 1, max_mode
-        ptr_f_re => f_re(mode)%get_f1()
-        ptr_f_im => f_im(mode)%get_f1()
 
+        ! interpolate m > 0 modes
+        do mode = 1, max_mode
+            ptr_f_re => f_re(mode)%get_f1()
+            ptr_f_im => f_im(mode)%get_f1()
+
+            do i = 1, np
+                phase(i) = phase(i) * cmplx(pcos(i), psin(i))
+                phase_re = 2.0 * real(phase(i))
+                phase_im = 2.0 * aimag(phase(i))
+                do j = 0, 1                                                             
+                    fp(i) = fp(i) + (ptr_f_re(1, idx(i) + j) * phase_re - &
+                                    ptr_f_im(1, idx(i) + j) * phase_im) * weight(j, i)
+                enddo
+            enddo
+        enddo
+    else if (lbound(weight, 1) == -1) then
+    ! interpolate m = 0 mode
+        ptr_f_re => f_re(0)%get_f1()
         do i = 1, np
-            phase(i) = phase(i) * cmplx(pcos(i), psin(i))
-            phase_re = 2.0 * real(phase(i))
-            phase_im = 2.0 * aimag(phase(i))
-            do j = 0, 1                                                             
-                fp(i) = fp(i) + (ptr_f_re(1, idx(i) + j) * phase_re - &
-                                 ptr_f_im(1, idx(i) + j) * phase_im) * weight(j, i)
+            do j = -1, 1
+                fp(i) = fp(i) + ptr_f_re(1, idx(i) + j) * weight(j, i)
             enddo
         enddo
-    enddo
-
+    
+        ! interpolate m > 0 modes
+        do mode = 1, max_mode
+            ptr_f_re => f_re(mode)%get_f1()
+            ptr_f_im => f_im(mode)%get_f1()
+    
+            do i = 1, np
+                phase(i) = phase(i) * cmplx(pcos(i), psin(i))
+                phase_re = 2.0 * real(phase(i))
+                phase_im = 2.0 * aimag(phase(i))
+                do j = -1, 1                                                             
+                    fp(i) = fp(i) + (ptr_f_re(1, idx(i) + j) * phase_re - &
+                                     ptr_f_im(1, idx(i) + j) * phase_im) * weight(j, i)
+                enddo
+            enddo
+        enddo
+    else
+        print *, 'Invalid lower bound of weight'
+    end if
+    
 end subroutine interp_field_part2d_scalar
 
+
 subroutine transform_to_cartesian(fp, np, pcos, psin)
 
     implicit none
diff --git a/source/species/neutral_class.f03 b/source/species/neutral_class.f03
index f8d9f02..401812e 100644
--- a/source/species/neutral_class.f03
+++ b/source/species/neutral_class.f03
@@ -339,6 +339,7 @@ module neutral_class
   real :: wp, dt
   ! pusher type
   integer :: push_type
+  integer :: part_shape
   
   double precision, dimension(:,:), pointer :: rate_param
 
@@ -405,18 +406,19 @@ subroutine alloc_neutral( this )
 
 end subroutine alloc_neutral
 
-subroutine init_neutral( this, opts, pf, max_mode, elem, max_e, qbm, wp, s, &
+subroutine init_neutral( this, opts, pf, part_shape, max_mode, elem, max_e, qbm, wp, s, &
   push_type, smooth_order )
 ! element is atomic number. e.g.: For Li, element = 3
 ! max_e is the maximum number of electrons that the programmer allow the atom
 ! to lose due to the ionization. It should be less or equal to element
+! only linear interpolation is used currently
    
   implicit none
 
   class(neutral), intent(inout) :: this
   type(options), intent(in) :: opts
   class(fdist2d), intent(inout), target :: pf
-  integer, intent(in) :: max_mode, elem, max_e, push_type
+  integer, intent(in) :: part_shape, max_mode, elem, max_e, push_type
   real, intent(in) :: qbm, wp, s
   integer, intent(in), optional :: smooth_order
 
@@ -431,21 +433,22 @@ subroutine init_neutral( this, opts, pf, max_mode, elem, max_e, qbm, wp, s, &
   this%wp = wp
   this%dt = opts%get_dxi()
   this%push_type = push_type
+  this%part_shape = part_shape
 
   allocate( this%q, this%cu, this%amu, this%dcu, this%rho_ion, this%rho_ion_add )
 
   smth_ord = 0
   if ( present(smooth_order) ) smth_ord = smooth_order
-  call this%q%new( opts, max_mode, p_ps_linear, smth_ord )
-  call this%rho_ion%new( opts, max_mode, p_ps_linear, smth_ord )
-  call this%rho_ion_add%new( opts, max_mode, p_ps_linear, smth_ord, has_2d=.false. )
-  call this%cu%new( opts, max_mode, p_ps_linear, smth_ord )
-  call this%dcu%new( opts, max_mode, p_ps_linear, smth_ord )
-  call this%amu%new( opts, max_mode, p_ps_linear, smth_ord )
-
-  call this%part%new( opts, pf, qbm, this%dt, s, if_empty=.true. )
-  call this%part_add%new( opts, pf, qbm, this%dt, s )
-
+  call this%q%new( opts, max_mode, this%part_shape, smth_ord )
+  call this%rho_ion%new( opts, max_mode, this%part_shape, smth_ord )
+  call this%rho_ion_add%new( opts, max_mode, this%part_shape, smth_ord, has_2d=.false. )
+  call this%cu%new( opts, max_mode, this%part_shape, smth_ord )
+  call this%dcu%new( opts, max_mode, this%part_shape, smth_ord )
+  call this%amu%new( opts, max_mode, this%part_shape, smth_ord )
+
+  call this%part%new( opts, pf, qbm, this%dt, s, this%part_shape, if_empty=.true. )
+  call this%part_add%new( opts, pf, qbm, this%dt, s, this%part_shape )
+ 
   this%q  = 0.0
   this%cu = 0.0
   this%rho_ion = 0.0
@@ -1186,7 +1189,7 @@ function get_multi_max(this)
 
 end function get_multi_max
 
-subroutine init_part2d_buf( this, opts, pf, qbm, dt, s, if_empty )
+subroutine init_part2d_buf( this, opts, pf, qbm, dt, s, part_shape, if_empty )
 
   implicit none
 
@@ -1194,6 +1197,7 @@ subroutine init_part2d_buf( this, opts, pf, qbm, dt, s, if_empty )
   type(options), intent(in) :: opts
   class(fdist2d), intent(inout) :: pf
   real, intent(in) :: qbm, dt, s
+  integer, intent(in) :: part_shape
   logical, intent(in), optional :: if_empty
 
   ! local data
@@ -1211,7 +1215,8 @@ subroutine init_part2d_buf( this, opts, pf, qbm, dt, s, if_empty )
 
   this%dr   = opts%get_dr()
   this%edge = opts%get_nd(1) * this%dr
-
+  this%part_shape = part_shape
+  
   ! only allocate position and charge, no need to initialize with profile
   allocate( this%x( 2, this%npmax ), this%q( this%npmax ) )
 
diff --git a/source/species/part2d_class.f03 b/source/species/part2d_class.f03
index 2a4a07b..1274d83 100644
--- a/source/species/part2d_class.f03
+++ b/source/species/part2d_class.f03
@@ -1,3 +1,5 @@
+#ifndef __TEMPLATE__
+
 module part2d_class
 
 use param
@@ -50,7 +52,8 @@ module part2d_class
   ! particle buffer
   real, dimension(:), allocatable :: pbuf
   ! on-axis deposition correction factor
-  real :: deposit_ax_corr
+  real, dimension(1,2) :: deposit_ax_corr
+  integer :: part_shape
 
   ! temporary arrays used for buffer reallocation
   real, private, dimension(:), allocatable :: tmp1
@@ -97,7 +100,7 @@ module part2d_class
 
 contains
 
-subroutine init_part2d( this, opts, pf, qbm, dt, s, if_empty )
+subroutine init_part2d( this, opts, pf, qbm, dt, s, part_shape, if_empty )
 
    implicit none
 
@@ -105,8 +108,8 @@ subroutine init_part2d( this, opts, pf, qbm, dt, s, if_empty )
    type(options), intent(in) :: opts
    class(fdist2d), intent(inout) :: pf
    real, intent(in) :: qbm, dt, s
+   integer, intent(in) :: part_shape
    logical, intent(in), optional :: if_empty
-
    ! local data
    character(len=18), save :: sname = 'init_part2d'
    integer :: npmax
@@ -124,7 +127,8 @@ subroutine init_part2d( this, opts, pf, qbm, dt, s, if_empty )
 
    this%dr   = opts%get_dr()
    this%edge = opts%get_nd(1) * this%dr
-   this%deposit_ax_corr = get_deposit_ax_corr( pf%ppc(1) )
+   this%part_shape = part_shape
+   this%deposit_ax_corr = get_deposit_ax_corr( pf%ppc(1), part_shape )
    
    allocate( this%x( 2, npmax ) )
    allocate( this%p( p_p_dim, npmax ) )
@@ -229,161 +233,457 @@ subroutine renew_part2d( this, pf, s, if_empty )
 end subroutine renew_part2d
 
 subroutine qdeposit_part2d( this, q )
-! deposit the charge density (rho - Jz)
+    implicit none
+    class(part2d), intent(in) :: this
+    class(field), intent(in) :: q
+    
+    select case (this%part_shape)
+    case(p_ps_linear)
+        call qdeposit_part2d_s1(this, q)
+    case(p_ps_quadratic)
+        call qdeposit_part2d_s2(this, q)
+    case default
+        call write_err('Not implemented yet')
+    end select
+    
+end subroutine qdeposit_part2d
 
-  implicit none
+subroutine deposit_chi_part2d( this, chi)
+    implicit none
+    class(part2d), intent(in) :: this
+    class(field), intent(inout) :: chi
+    
+    select case (this%part_shape)
+    case(p_ps_linear)
+        call deposit_chi_part2d_s1(this, chi)
+    case(p_ps_quadratic)
+        call deposit_chi_part2d_s2(this, chi)
+    case default
+        call write_err('Not implemented yet')
+    end select
+    
+end subroutine deposit_chi_part2d
 
-  class(part2d), intent(in) :: this
-  class(field), intent(in) :: q
-  ! local data
-  type(ufield), dimension(:), pointer :: q_re => null(), q_im => null()
-  complex(kind=DB), dimension(p_cache_size) :: phase0
-  complex(kind=DB) :: phase
-  real, dimension(:,:), pointer :: q0 => null(), qr => null(), qi => null()
-  real, dimension(0:1) :: wt ! interpolation weight
-  real, dimension(p_cache_size) :: pos ! normalized position
-  real :: idr, ir
-  integer(kind=LG) :: ptrcur, pp
-  integer :: i, j, nn, noff, nrp, np, mode, max_mode
+subroutine amjdeposit_std_part2d(this, ef, bf, cu, amu, dcu, dt_)
+    ! deposit the current, acceleration and momentum flux
+    
+    implicit none
+    
+    class(part2d), intent(inout) :: this
+    class(field), intent(in) :: cu, amu, dcu
+    class(field), intent(in) :: ef, bf
+    real, intent(in), optional :: dt_
+
+    select case(this%part_shape)
+    case(p_ps_linear)
+      call amjdeposit_std_part2d_s1(this, ef, bf, cu, amu, dcu, dt_)
+    case(p_ps_quadratic)
+      call amjdeposit_std_part2d_s2(this, ef, bf, cu, amu, dcu, dt_)
+    case default
+      call write_err('Not implemented yet')
+    end select
+  
+end subroutine amjdeposit_std_part2d
 
-  character(len=18), save :: sname = 'qdeposit_part2d'
+subroutine amjdeposit_robust_part2d(this, ef, bf, cu, amu, dcu, dt_)
+    ! deposit the current, acceleration and momentum flux
+    
+    implicit none
+    
+    class(part2d), intent(inout) :: this
+    class(field), intent(in) :: cu, amu, dcu
+    class(field), intent(in) :: ef, bf
+    real, intent(in), optional :: dt_
+
+    select case(this%part_shape)
+    case(p_ps_linear)
+      call amjdeposit_robust_part2d_s1(this, ef, bf, cu, amu, dcu, dt_)
+    case(p_ps_quadratic)
+      call amjdeposit_robust_part2d_s2(this, ef, bf, cu, amu, dcu, dt_)
+    case default
+      call write_err('Not implemented yet')
+    end select
+  
+end subroutine amjdeposit_robust_part2d
 
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'deposit 2D particles' )
 
-  idr = 1.0 / this%dr
+subroutine amjdeposit_std_pgc_part2d(this, ef, bf, af, cu, amu, dcu, dt_)
+    ! deposit the current, acceleration and momentum flux
+    
+      implicit none
+    
+      class(part2d), intent(inout) :: this
+      class(field), intent(in) :: cu, amu, dcu
+      class(field), intent(in) :: ef, bf
+      class(field_laser), intent(in) :: af
+      real, intent(in), optional :: dt_
+      
+      select case(this%part_shape)
+      case(p_ps_linear)
+        call amjdeposit_std_pgc_part2d_s1(this, ef, bf, af, cu, amu, dcu, dt_)
+      case(p_ps_quadratic)
+        call amjdeposit_std_pgc_part2d_s2(this, ef, bf, af, cu, amu, dcu, dt_)
+      case default
+        call write_err('Not implemented yet')
+      end select
+      
+    end subroutine amjdeposit_std_pgc_part2d
+    
+subroutine amjdeposit_robust_pgc_part2d(this, ef, bf, af, cu, amu, dcu, dt_)
+! deposit the current, acceleration and momentum flux
 
-  q_re => q%get_rf_re()
-  q_im => q%get_rf_im()
+  implicit none
 
-  max_mode = q%get_max_mode()
+  class(part2d), intent(inout) :: this
+  class(field), intent(in) :: cu, amu, dcu
+  class(field), intent(in) :: ef, bf
+  class(field_laser), intent(in) :: af
+  real, intent(in), optional :: dt_
+  
+  select case(this%part_shape)
+  case(p_ps_linear)
+    call amjdeposit_robust_pgc_part2d_s1(this, ef, bf, af, cu, amu, dcu, dt_)
+  case(p_ps_quadratic)
+    call amjdeposit_robust_pgc_part2d_s2(this, ef, bf, af, cu, amu, dcu, dt_)
+  case default
+    call write_err('Not implemented yet')
+  end select
+  
+end subroutine amjdeposit_robust_pgc_part2d
 
-  noff = q_re(0)%get_noff(1)
-  nrp  = q_re(0)%get_ndp(1)
-  q0   => q_re(0)%get_f1()
+! subroutine interp_emf_part2d( ef_re, ef_im, bf_re, bf_im, max_mode, x, dr, bp, ep, np, ptrcur, &
+!   geom, weight, ix, pcos, psin )
+
+!   implicit none
+
+!   type(ufield), dimension(:), pointer, intent(in) :: ef_re, ef_im, bf_re, bf_im
+!   integer, intent(in) :: max_mode, np, geom
+!   real, intent(in) :: dr
+!   real, dimension(:,:), intent(in) :: x
+!   real, dimension(:,:), intent(inout) :: bp, ep
+!   integer(kind=LG), intent(in) :: ptrcur
+!   real, dimension(:,:), intent(out), optional :: weight
+!   integer, dimension(:), intent(out), optional :: ix
+!   real, dimension(:), intent(out), optional :: pcos, psin
+
+!   real, dimension(:,:), pointer :: e0, b0, er, ei, br, bi
+!   integer :: noff, i, j, nn, mode
+!   integer(kind=LG) :: pp
+!   real :: pos, idr, ph_r, ph_i, cc, ss
+!   real, dimension(0:1) :: wt
+!   complex(kind=DB) :: phase0, phase
+
+!   idr = 1.0 / dr
+!   noff = ef_re(0)%get_noff(1)
+
+!   e0 => ef_re(0)%get_f1()
+!   b0 => bf_re(0)%get_f1()
+
+!   ep = 0.0
+!   bp = 0.0
+
+!   pp = ptrcur
+!   do i = 1, np
+!     pos = sqrt( x(1,pp)**2 + x(2,pp)**2 ) * idr
+!     ! cosine and sine
+!     cc = x(1,pp) / pos * idr
+!     ss = x(2,pp) / pos * idr
+!     phase0 = cmplx( cc, ss )
+
+!     ! in-cell position
+!     nn  = int( pos )
+!     pos = pos - real(nn)
+
+!     ! cell index
+!     nn = nn - noff + 1
+
+!     ! get interpolation weight factor
+!     call spline_linear( pos, wt )
+
+!     if ( present(weight) ) weight(:,i) = wt
+!     if ( present(ix) ) ix(i) = nn
+!     if ( present(pcos) ) then
+!       pcos(i) = cc
+!       psin(i) = ss
+!     endif
+
+!     ! interpolate m=0 mode
+!     do j = 0, 1
+!       ep(:,i) = ep(:,i) + e0(:,nn+j) * wt(j)
+!       bp(:,i) = bp(:,i) + b0(:,nn+j) * wt(j)
+!     enddo
+
+!     ! interpolate m>0 modes
+!     phase = cmplx( 1.0, 0.0 )
+!     do mode = 1, max_mode
+!         phase = phase * phase0
+!         ph_r = 2.0 * real(phase)
+!         ph_i = 2.0 * aimag(phase)
+
+!         er => ef_re(mode)%get_f1()
+!         ei => ef_im(mode)%get_f1()
+!         br => bf_re(mode)%get_f1()
+!         bi => bf_im(mode)%get_f1()
+
+!         do j = 0, 1
+!           ep(:,i) = ep(:,i) + ( er(:,nn+j) * ph_r - ei(:,nn+j) * ph_i ) * wt(j)
+!           bp(:,i) = bp(:,i) + ( br(:,nn+j) * ph_r - bi(:,nn+j) * ph_i ) * wt(j)
+!         enddo
+!     enddo
+
+!     ! transform from cylindrical geometry to Cartesian geometry
+!     if ( geom == p_cartesian ) then
+!       ! ph_r, ph_i are temporary variables here
+!       ph_r = ep(1,i) * cc - ep(2,i) * ss
+!       ph_i = ep(1,i) * ss + ep(2,i) * cc
+!       ep(1,i) = ph_r
+!       ep(2,i) = ph_i
+
+!       ph_r = bp(1,i) * cc - bp(2,i) * ss
+!       ph_i = bp(1,i) * ss + bp(2,i) * cc
+!       bp(1,i) = ph_r
+!       bp(2,i) = ph_i
+!     endif
+
+!     pp = pp + 1
+!   enddo
+
+! end subroutine interp_emf_part2d
+
+! subroutine interp_laser_part2d( ar_re, ar_im, ai_re, ai_im, ar_grad_re, ar_grad_im, &
+!   ai_grad_re, ai_grad_im, max_mode, x, dr, apr, api, apr_grad, api_grad, np, ptrcur, geom )
+
+!    implicit none
+
+!    type(ufield), dimension(:), pointer, intent(in) :: ar_re, ar_im, ai_re, ai_im
+!    type(ufield), dimension(:), pointer, intent(in) :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
+!    integer, intent(in) :: max_mode, np, geom
+!    real, intent(in) :: dr
+!    real, dimension(:,:), intent(in) :: x
+!    real, dimension(:), intent(inout) :: apr, api
+!    real, dimension(:,:), intent(inout) :: apr_grad, api_grad
+!    integer(kind=LG), intent(in) :: ptrcur
+
+!    integer :: noff, i, j, nn, m
+!    integer(kind=LG) :: pp
+!    real :: pos, idr, ph_r, ph_i, cc, ss
+!    real, dimension(0:1) :: wt
+!    complex :: phase0, phase
+
+!    idr = 1.0 / dr
+!    noff = ar_re(0)%get_noff(1)
+!    apr = 0.0
+!    api = 0.0
+!    apr_grad = 0.0
+!    api_grad = 0.0
+
+!    pp = ptrcur
+!    do i = 1, np
+!       pos = sqrt( x(1,pp)**2 + x(2,pp)**2 ) * idr
+!       ! cosine and sine
+!       cc = x(1,pp) / pos * idr
+!       ss = x(2,pp) / pos * idr
+!       phase0 = cmplx( cc, ss )
+
+!       ! in-cell position
+!       nn  = int( pos )
+!       pos = pos - real(nn)
+
+!       ! cell index
+!       nn = nn - noff + 1
+
+!       ! get interpolation weight factor
+!       call spline_linear( pos, wt )
+
+!       ! interpolate m = 0 mode
+!       do j = 0, 1
+!         apr(i) = apr(i) + ar_re(0)%f1(1,nn+j) * wt(j)
+!         api(i) = api(i) + ai_re(0)%f1(1,nn+j) * wt(j)
+!         apr_grad(:,i) = apr_grad(:,i) + ar_grad_re(0)%f1(:,nn+j) * wt(j)
+!         api_grad(:,i) = api_grad(:,i) + ai_grad_re(0)%f1(:,nn+j) * wt(j)
+!       enddo
+
+!       ! interpolate m > 0 modes
+!       phase = cmplx( 1.0, 0.0 )
+!       do m = 1, max_mode
+!          phase = phase * phase0
+!          ph_r = 2.0 * real(phase)
+!          ph_i = 2.0 * aimag(phase)
+
+!          do j = 0, 1
+!             apr(i) = apr(i) + ( ar_re(m)%f1(1,nn+j) * ph_r - ar_im(m)%f1(1,nn+j) * ph_i ) * wt(j)
+!             api(i) = api(i) + ( ai_re(m)%f1(1,nn+j) * ph_r - ai_im(m)%f1(1,nn+j) * ph_i ) * wt(j)
+!             apr_grad(:,i) = apr_grad(:,i) + ( ar_grad_re(m)%f1(:,nn+j) * ph_r - ar_grad_im(m)%f1(:,nn+j) * ph_i ) * wt(j)
+!             api_grad(:,i) = api_grad(:,i) + ( ai_grad_re(m)%f1(:,nn+j) * ph_r - ai_grad_im(m)%f1(:,nn+j) * ph_i ) * wt(j)
+!          enddo
+!       enddo
+
+!       ! transform from cylindrical geometry to Cartesian geometry
+!       if ( geom == p_cartesian ) then
+!         ! ph_r, ph_i are temporary variables here
+!         ph_r = apr_grad(1,i) * cc - apr_grad(2,i) * ss
+!         ph_i = apr_grad(1,i) * ss + apr_grad(2,i) * cc
+!         apr_grad(1,i) = ph_r
+!         apr_grad(2,i) = ph_i
+
+!         ph_r = api_grad(1,i) * cc - api_grad(2,i) * ss
+!         ph_i = api_grad(1,i) * ss + api_grad(2,i) * cc
+!         api_grad(1,i) = ph_r
+!         api_grad(2,i) = ph_i
+!       endif
+
+!       pp = pp + 1
+!     enddo
+
+! end subroutine interp_laser_part2d
 
-  do ptrcur = 1, this%npp, p_cache_size
+subroutine push_u_std_part2d(this, ef, bf, dt_)
+    implicit none
+    
+    class(part2d), intent(inout) :: this
+    class(field), intent(in) :: ef, bf
+    real, intent(in), optional :: dt_
+  
+    select case(this%part_shape)
+    case(p_ps_linear)
+      call push_u_std_part2d_s1(this, ef, bf, dt_)
+    case(p_ps_quadratic)
+      call push_u_std_part2d_s2(this, ef, bf, dt_)
+    case default
+      call write_err('Not implemented yet')
+    end select
+    
+end subroutine push_u_std_part2d
 
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
+subroutine push_u_robust_part2d(this, ef, bf, dt_)
+    implicit none
+    
+    class(part2d), intent(inout) :: this
+    class(field), intent(in) :: ef, bf
+    real, intent(in), optional :: dt_
+  
+    select case(this%part_shape)
+    case(p_ps_linear)
+      call push_u_robust_part2d_s1(this, ef, bf, dt_)
+    case(p_ps_quadratic)
+      call push_u_robust_part2d_s2(this, ef, bf, dt_)
+    case default
+      call write_err('Not implemented yet')
+    end select
+    
+end subroutine push_u_robust_part2d
 
-    pp = ptrcur
-    do i = 1, np
-       ! get normalized position
-       pos(i) = sqrt( this%x(1, pp)**2 + this%x(2, pp)**2 ) * idr
-       phase0(i) = cmplx( this%x(1, pp), -this%x(2, pp) ) / pos(i) * idr
-       pp = pp + 1
-    enddo
+subroutine push_u_robust_pgc_part2d(this, ef, bf, af, dt_)
+  implicit none
+  
+  class(part2d), intent(inout) :: this
+  class(field), intent(in) :: ef, bf
+  class(field_laser), intent(in) :: af
+  real, intent(in), optional :: dt_
 
-    pp = ptrcur
-    do i = 1, np
-      nn = floor( pos(i) )
-      ! in-cell position
-      pos(i) = pos(i) - real(nn)
-      nn = nn - noff + 1
+  select case(this%part_shape)
+  case(p_ps_linear)
+    call push_u_robust_pgc_part2d_s1(this, ef, bf, af, dt_)
+  case(p_ps_quadratic)
+    call push_u_robust_pgc_part2d_s2(this, ef, bf, af, dt_)
+  case default
+    call write_err('Not implemented yet')
+  end select
+  
+end subroutine push_u_robust_pgc_part2d
 
-      call spline_linear( pos(i), wt )
+subroutine push_u_std_pgc_part2d(this, ef, bf, af, dt_)
 
-      phase = cmplx( 1.0, 0.0 ) * this%q(pp)
-      ! deposit m=0 mode
-      do j = 0, 1
-        q0( 1, nn+j ) = q0( 1, nn+j ) + wt(j) * real(phase)
-      enddo
+  implicit none
 
-      ! deposit m>0 mode
-      do mode = 1, max_mode
-        qr => q_re(mode)%get_f1()
-        qi => q_im(mode)%get_f1()
-        phase = phase * phase0(i)
+  class(part2d), intent(inout) :: this
+  class(field), intent(in) :: ef, bf
+  class(field_laser), intent(in) :: af
+  real, intent(in), optional :: dt_
+  
+  select case(this%part_shape)
+  case(p_ps_linear)
+    call push_u_std_pgc_part2d_s1(this, ef, bf, af, dt_)
+  case(p_ps_quadratic)
+    call push_u_std_pgc_part2d_s2(this, ef, bf, af, dt_)
+  case default
+    call write_err('Not implemented yet')
+  end select
+  
+end subroutine push_u_std_pgc_part2d
 
-        do j = 0, 1
-          qr( 1, nn+j ) = qr( 1, nn+j ) + wt(j) * real(phase)
-          qi( 1, nn+j ) = qi( 1, nn+j ) + wt(j) * aimag(phase)
-        enddo
-      enddo
-       pp = pp + 1
-    enddo
+subroutine push_x_part2d(this, dt_)
 
-  enddo
+  implicit none
 
-  if (noff == 0) then
+  class(part2d), intent(inout) :: this
+  real, intent(in), optional :: dt_
+  ! local data
+  character(len=18), save :: sname = 'push_x_part2d'
 
-    q0(1,0) = 0.0 ! guard cell is useless on axis
-    q0(1,1) = 8.0 * q0(1,1)
-    ! q0(1,1) = q0(1,1) * this%deposit_ax_corr
-    do j = 2, nrp + 1
-      ir = 1.0 / ( j + noff - 1 )
-      q0(1,j) = q0(1,j) * ir
-    enddo
+  integer :: i, np
+  real :: gam, dtc, dt
+  integer(kind=LG) :: ptrcur, pp
 
-    do i = 1, max_mode
-      qr => q_re(i)%get_f1()
-      qi => q_im(i)%get_f1()
-      qr(1,0) = 0.0
-      qi(1,0) = 0.0
-      qr(1,1) = 0.0
-      qi(1,1) = 0.0
-      do j = 2, nrp + 1
-        ir = 1.0 / ( j + noff - 1 )
-        qr(1,j) = qr(1,j) * ir
-        qi(1,j) = qi(1,j) * ir
-      enddo
-    enddo
+  call write_dbg(cls_name, sname, cls_level, 'starts')
+  call start_tprof( 'push 2D particles' )
 
-  else
+  dt = this%dt
+  if (present(dt_)) dt = dt_
 
-    do j = 0, nrp + 1
-      ir = 1.0 / ( j + noff - 1 )
-      q0(1,j) = q0(1,j) * ir
-    enddo
+  do ptrcur = 1, this%npp, p_cache_size
 
-    do i = 1, max_mode
-      qr => q_re(i)%get_f1()
-      qi => q_im(i)%get_f1()
-      do j = 0, nrp + 1
-        ir = 1.0 / ( j + noff - 1 )
-        qr(1,j) = qr(1,j) * ir
-        qi(1,j) = qi(1,j) * ir
-      enddo
+    ! check if last copy of table and set np
+    if( ptrcur + p_cache_size > this%npp ) then
+      np = this%npp - ptrcur + 1
+    else
+      np = p_cache_size
+    endif
+
+    pp = ptrcur
+    do i = 1, np
+      dtc = dt / (this%gamma(pp) - this%p(3, pp))
+      this%x(1, pp) = this%x(1, pp) + this%p(1, pp) * dtc
+      this%x(2, pp) = this%x(2, pp) + this%p(2, pp) * dtc
+      pp = pp + 1
     enddo
 
-  endif
+  enddo
 
-  call stop_tprof( 'deposit 2D particles' )
+  call stop_tprof( 'push 2D particles' )
   call write_dbg(cls_name, sname, cls_level, 'ends')
 
-end subroutine qdeposit_part2d
+end subroutine push_x_part2d
 
-subroutine deposit_chi_part2d( this, chi )
-! deposit the plasma susceptibility
+subroutine interp_psi_part2d(this, psi_re, psi_im, max_mode)
 
   implicit none
-  class(part2d), intent(in) :: this
-  class(field), intent(inout) :: chi
-  ! local data
-  type(ufield), dimension(:), pointer :: chi_re => null(), chi_im => null()
-  complex(kind=DB) :: phase, phase0
-  real, dimension(0:1) :: wt ! interpolation weight
-  real :: pos_norm ! normalized position
-  real :: idr, ir
-  integer(kind=LG) :: ptrcur, pp
-  integer :: i, j, nn, noff, nrp, np, mode, max_mode
+  class(part2d), intent(inout) :: this
+  type(ufield), dimension(:), pointer, intent(in) :: psi_re, psi_im
+  integer, intent(in) :: max_mode
 
-  character(len=18), save :: sname = 'deposit_chi_part2d'
+  ! real, dimension(:, :), pointer :: psi_re_ptr, psi_im_ptr
+  integer :: noff, i, np
+  integer(kind=LG) :: pp, ptrcur
+  ! real :: pos, idr, ph_r, ph_i, cc, ss
+  ! real, dimension(0:1) :: wt
+  ! complex(kind=DB) :: phase0, phase
 
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'deposit 2D particles' )
+  real, dimension(:, :), allocatable :: weight
+  real, dimension(p_cache_size) :: pcos, psin, psip
+  integer, dimension(p_cache_size) :: idx
 
-  chi_re => chi%get_rf_re()
-  chi_im => chi%get_rf_im()
-  idr      = 1.0 / this%dr
-  max_mode = chi%get_max_mode()
-  noff     = chi_re(0)%get_noff(1)
-  nrp      = chi_re(0)%get_ndp(1)
+  ! idr = 1.0 / this%dr
+  noff = psi_re(0)%get_noff(1)
+  ! psi_re_ptr => psi_re(0)%get_f1()
+  select case(this%part_shape)
+  case(p_ps_linear)
+    allocate(weight(0:1, p_cache_size))
+  case(p_ps_quadratic)
+    allocate(weight(-1:1, p_cache_size))
+case default
+    call write_err('Not implemented yet')
+end select
 
   do ptrcur = 1, this%npp, p_cache_size
 
@@ -394,611 +694,498 @@ subroutine deposit_chi_part2d( this, chi )
       np = p_cache_size
     endif
 
+    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
+    call interp_field(psi_re, psi_im, max_mode, weight, idx, pcos, psin, np, psip)
+
     pp = ptrcur
     do i = 1, np
+      this%psi(pp) = psip(i)
+    enddo
 
-      ! get normalized position
-      pos_norm = sqrt( this%x(1, pp)**2 + this%x(2, pp)**2 ) * idr
-      phase0 = cmplx( this%x(1, pp), -this%x(2, pp) ) / pos_norm * idr
+  enddo
+  
+end subroutine interp_psi_part2d
 
-      ! calculate in-cell position
-      nn = floor( pos_norm )
-      pos_norm = pos_norm - real(nn)
-      nn = nn - noff + 1
+subroutine update_bound_part2d( this )
 
-      call spline_linear( pos_norm, wt )
+   implicit none
 
-      ! phase = -1.0 * cmplx( 1.0, 0.0 ) * this%qbm * this%q(pp) / this%psi(pp)
-      phase = -1.0 * cmplx( 1.0, 0.0 ) * this%qbm * this%q(pp) / (1.0 - this%qbm * this%psi(pp))
-      ! deposit m = 0 mode
-      do j = 0, 1
-        chi_re(0)%f1(1,nn+j) = chi_re(0)%f1(1,nn+j) + wt(j) * real(phase)
-      enddo
+   class(part2d), intent(inout) :: this
+   ! local
+   integer(kind=LG) :: i
+   real :: pos
+   character(len=32), save :: sname = "update_bound_part2d"
 
-      ! deposit m > 0 modes
-      do mode = 1, max_mode
-        phase = phase * phase0
-        do j = 0, 1
-          chi_re(mode)%f1(1,nn+j) = chi_re(mode)%f1(1,nn+j) + wt(j) * real(phase)
-          chi_im(mode)%f1(1,nn+j) = chi_im(mode)%f1(1,nn+j) + wt(j) * aimag(phase)
-        enddo
-      enddo
+   call write_dbg(cls_name, sname, cls_level, 'starts')
 
-      pp = pp + 1
-    enddo
+   if ( this%npp == 0 ) return
 
-  enddo
+   call start_tprof( 'push 2D particles' )
 
-  if ( id_proc_loc() == 0 ) then
+   i = 1
 
-    chi_re(0)%f1(1,0) = 0.0 ! guard cell is useless on axis
-    ! chi_re(0)%f1(1,1) = 8.0 * chi_re(0)%f1(1,1) ! is this correct???
-    chi_re(0)%f1(1,1) = chi_re(0)%f1(1,1) * this%deposit_ax_corr
-    do j = 2, nrp + 1
-      ir = 1.0 / ( j + noff - 1 )
-      chi_re(0)%f1(1,j) = chi_re(0)%f1(1,j) * ir
-    enddo
+   do while ( i < this%npp )
 
-    do mode = 1, max_mode
-      ! guard cell is useless on axis
-      chi_re(mode)%f1(1,0) = 0.0
-      chi_im(mode)%f1(1,0) = 0.0
-      ! on-axis values are all zeros
-      chi_re(mode)%f1(1,1) = 0.0
-      chi_im(mode)%f1(1,1) = 0.0
-      do j = 2, nrp + 1
-        ir = 1.0 / ( j + noff - 1 )
-        chi_re(mode)%f1(1,j) = chi_re(mode)%f1(1,j) * ir
-        chi_im(mode)%f1(1,j) = chi_im(mode)%f1(1,j) * ir
-      enddo
-    enddo
+      pos = sqrt( this%x(1,i)**2 + this%x(2,i)**2 )
 
-  else
+      ! check if particle goes out of the physical edge
+      if ( pos >= this%edge ) then
+         this%x(:,i)   = this%x(:, this%npp)
+         this%p(:,i)   = this%p(:, this%npp)
+         this%gamma(i) = this%gamma(this%npp)
+         this%psi(i)   = this%psi(this%npp)
+         this%q(i)     = this%q(this%npp)
+         this%npp = this%npp - 1
+         cycle
+      endif
 
-    do j = 0, nrp + 1
-      ir = 1.0 / ( j + noff - 1 )
-      chi_re(0)%f1(1,j) = chi_re(0)%f1(1,j) * ir
-    enddo
+      i = i + 1
+   enddo
 
-    do mode = 1, max_mode
-      do j = 0, nrp + 1
-        ir = 1.0 / ( j + noff - 1 )
-        chi_re(mode)%f1(1,j) = chi_re(mode)%f1(1,j) * ir
-        chi_im(mode)%f1(1,j) = chi_im(mode)%f1(1,j) * ir
-      enddo
-    enddo
+   ! deal with the last particle
+   pos = sqrt( this%x(1,this%npp)**2 + this%x(2,this%npp)**2 )
 
-  endif
+   if ( pos >= this%edge ) then
+      this%npp = this%npp - 1
+   endif
 
-  call stop_tprof( 'deposit 2D particles' )
-  call write_dbg(cls_name, sname, cls_level, 'ends')
+   call stop_tprof( 'push 2D particles' )
+   call write_dbg(cls_name, sname, cls_level, 'ends')
 
-end subroutine deposit_chi_part2d
+end subroutine update_bound_part2d
 
-subroutine amjdeposit_std_part2d(this, ef, bf, cu, amu, dcu, dt_)
-! deposit the current, acceleration and momentum flux
+subroutine pipesend_part2d(this, tag, id)
 
   implicit none
 
   class(part2d), intent(inout) :: this
-  class(field), intent(in) :: cu, amu, dcu
-  class(field), intent(in) :: ef, bf
-  real, intent(in), optional :: dt_
-  ! local data
-  character(len=32), save :: sname = 'amjdeposit_std_part2d'
-  type(ufield), dimension(:), pointer :: ef_re => null(), ef_im => null()
-  type(ufield), dimension(:), pointer :: bf_re => null(), bf_im => null()
-  type(ufield), dimension(:), pointer :: cu_re => null(), cu_im => null()
-  type(ufield), dimension(:), pointer :: dcu_re => null(), dcu_im => null()
-  type(ufield), dimension(:), pointer :: amu_re => null(), amu_im => null()
-
-  real, dimension(:,:), pointer :: cu0 => null(), dcu0 => null(), amu0 => null()
-  real, dimension(:,:), pointer :: cur => null(), dcur => null(), amur => null()
-  real, dimension(:,:), pointer :: cui => null(), dcui => null(), amui => null()
+  integer, intent(in) :: tag
+  integer, intent(inout) :: id
 
-  integer(kind=LG) :: ptrcur, pp
-  integer :: i, j, noff, nrp, np, mode, max_mode
-  integer, dimension(p_cache_size) :: ix
-  real, dimension(p_p_dim, p_cache_size) :: bp, ep, wp, u0, u
-  real, dimension(0:1, p_cache_size) :: weight
-  real, dimension(p_cache_size) :: pcos, psin
-  real, dimension(p_p_dim) :: du, u2, utmp
-  real :: qtmh, qtmh1, qtmh2, idt, gam, ostq, ipsi, dpsi, w, ir, dt
-  complex(kind=DB) :: phase, phase0
+  ! local data
+  character(len=18), save :: sname = 'pipesend_part2d'
+  integer :: des, ierr, i, stride
 
   call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'deposit 2D particles' )
-
-  ef_re  => ef%get_rf_re();  ef_im  => ef%get_rf_im()
-  bf_re  => bf%get_rf_re();  bf_im  => bf%get_rf_im()
-  cu_re  => cu%get_rf_re();  cu_im  => cu%get_rf_im()
-  dcu_re => dcu%get_rf_re(); dcu_im => dcu%get_rf_im()
-  amu_re => amu%get_rf_re(); amu_im => amu%get_rf_im()
-
-  dt = this%dt
-  if (present(dt_)) dt = dt_
-
-  idt = 1.0 / dt
-  qtmh = 0.5 * this%qbm * dt
-  max_mode = ef%get_max_mode()
-
-  noff = cu_re(0)%get_noff(1)
-  nrp  = cu_re(0)%get_ndp(1)
-
-  cu0  => cu_re(0)%get_f1()
-  dcu0 => dcu_re(0)%get_f1()
-  amu0 => amu_re(0)%get_f1()
-
-  do ptrcur = 1, this%npp, p_cache_size
-
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
 
-    ! interpolate fields to particles
-    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, ix, pcos, psin)
-    call interp_field(ef_re, ef_im, max_mode, weight, ix, pcos, psin, np, ep)
-    call interp_field(bf_re, bf_im, max_mode, weight, ix, pcos, psin, np, bp)
-
-    pp = ptrcur
-    do i = 1, np
+  des = id_proc() + num_procs_loc()
 
-      ! calculate wake field
-      wp(1, i) = ep(1, i) - bp(2, i)
-      wp(2, i) = ep(2, i) + bp(1, i)
-      wp(3, i) = ep(3, i)
+  if ( des >= num_procs() ) then
+    id = MPI_REQUEST_NULL
+    call write_dbg(cls_name, sname, cls_level, 'ends')
+    return
+  endif
 
-      ! transform momentum from Cartesian to cylindrical coordinates
-      u0(1, i) = this%p(1, pp) * pcos(i) + this%p(2, pp) * psin(i)
-      u0(2, i) = this%p(2, pp) * pcos(i) - this%p(1, pp) * psin(i)
-      u0(3, i) = this%p(3, pp)
+  do i = 1, this%npp
 
-      ! TODO: There is a strict time-centered scheme and it is left for future work.
+    stride = (i-1) * this%part_dim
 
-      ! normalize the E and B fields
-      gam = sqrt(1.0 + u0(1, i)**2 + u0(2, i)**2 + u0(3, i)**2)
-      qtmh2 = qtmh / (1.0 - this%qbm * this%psi(pp))
-      qtmh1 = qtmh2 * gam
-      bp(:, i) = bp(:, i) * qtmh2
+    this%pbuf( 1 + stride ) = this%x(1,i)
+    this%pbuf( 2 + stride ) = this%x(2,i)
+    this%pbuf( 3 + stride ) = this%p(1,i)
+    this%pbuf( 4 + stride ) = this%p(2,i)
+    this%pbuf( 5 + stride ) = this%p(3,i)
+    this%pbuf( 6 + stride ) = this%gamma(i)
+    this%pbuf( 7 + stride ) = this%psi(i)
+    this%pbuf( 8 + stride ) = this%q(i)
 
-      ! half electric acceleration
-      utmp(:) = u0(:, i) + ep(:, i) * qtmh1
+  enddo
 
-      ! magnetic rotation
-      u(1, i) = utmp(1) + utmp(2) * bp(3, i) - utmp(3) * bp(2, i)
-      u(2, i) = utmp(2) + utmp(3) * bp(1, i) - utmp(1) * bp(3, i)
-      u(3, i) = utmp(3) + utmp(1) * bp(2, i) - utmp(2) * bp(1, i)
+  ! NOTE: npp*xdim might be larger than MAX_INT32
+  call mpi_isend( this%pbuf, int(this%npp * this%part_dim), p_dtype_real, des, tag, &
+    comm_world(), id, ierr )
 
-      ostq = 2.0 / (1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2)
-      bp(:,i) = bp(:,i) * ostq
+  ! check for errors
+  if (ierr /= 0) then
+    call write_err('MPI_ISEND failed')
+  endif
 
-      utmp(1) = utmp(1) + u(2, i) * bp(3, i) - u(3, i) * bp(2, i)
-      utmp(2) = utmp(2) + u(3, i) * bp(1, i) - u(1, i) * bp(3, i)
-      utmp(3) = utmp(3) + u(1, i) * bp(2, i) - u(2, i) * bp(1, i)
+  call write_dbg(cls_name, sname, cls_level, 'ends')
 
-      ! half electric acceleration
-      gam = sqrt(1.0 + utmp(1)**2 + utmp(2)**2 + utmp(3)**2)
-      qtmh1 = qtmh2 * gam
-      u(:, i) = utmp(:) + ep(:, i) * qtmh1
+end subroutine pipesend_part2d
 
-      pp = pp + 1
-    enddo
+subroutine piperecv_part2d(this, tag)
 
-    ! calculate and store time-centered values
-    ! deposit momentum flux, acceleration density, and current density
-    pp = ptrcur
-    do i = 1, np
+  implicit none
 
-      du(1) = idt * ( u(1,i) - u0(1,i) )
-      du(2) = idt * ( u(2,i) - u0(2,i) )
-      
-      u(:, i)  = 0.5 * ( u(:, i) + u0(:, i) )
-      this%gamma(pp) = sqrt(1.0 + u(1, i)**2 + u(2, i)**2 + u(3, i)**2)
+  class(part2d), intent(inout) :: this
+  integer, intent(in) :: tag
+  ! local data
+  character(len=18), save :: sname = 'piperecv_part2d'
+  integer, dimension(MPI_STATUS_SIZE) :: stat
+  integer :: recv_cnt, src, ierr, i, stride
+  real :: ratio
 
-      ipsi = 1.0 / (1.0 - this%qbm * this%psi(pp))
-      dpsi = this%qbm * (wp(3, i) - (wp(1, i) * u(1, i) + wp(2, i) * u(2, i)) * ipsi)
+  call write_dbg(cls_name, sname, cls_level, 'starts')
 
-      du(1) = du(1) + u(1, i) * dpsi * ipsi
-      du(2) = du(2) + u(2, i) * dpsi * ipsi
+  if ( .not. allocated(recv_buf) ) then
+    allocate( recv_buf( this%part_dim * recv_buf_size ) )
+  endif
 
-      u2(1) = u(1, i) * u(1, i) * ipsi
-      u2(2) = u(1, i) * u(2, i) * ipsi
-      u2(3) = u(2, i) * u(2, i) * ipsi
+  src = id_proc() - num_procs_loc()
 
-      phase0 = cmplx(pcos(i), -psin(i))
-      phase  = cmplx(1.0, 0.0) * this%q(pp) * ipsi
+  if (src < 0) then
+    call write_dbg(cls_name, sname, cls_level, 'ends')
+    return
+  endif
 
-      ! deposit m = 0 mode
-      do j = 0, 1
-        w = weight(j, i) * real(phase)
-        cu0(1:3, ix(i) + j)  = cu0(1:3, ix(i) + j)  + w * u(1:3, i)
-        dcu0(1:2, ix(i) + j) = dcu0(1:2, ix(i) + j) + w * du(1:2)
-        amu0(1:3, ix(i) + j) = amu0(1:3, ix(i) + j) + w * u2(1:3)
-      enddo
+  ! probe for incoming message
+  call mpi_probe( src, tag, comm_world(), stat, ierr )
+  call mpi_get_count( stat, p_dtype_real, recv_cnt, ierr )
+  recv_cnt = recv_cnt / this%part_dim
 
-      ! deposit m > 0 mode
-      do mode = 1, max_mode
+  ! check if the receiving buffer needs to be reallocated
+  if ( recv_cnt > recv_buf_size ) then
+    call write_stdout( '[process ' // num2str(id_proc()) // ']: Resizing 2D &
+          &particle pipeline receiving buffer!', only_root = .false. )
+    deallocate( recv_buf )
+    recv_buf_size = int( recv_cnt * p_buf_incr )
+    allocate( recv_buf( recv_buf_size * this%part_dim ) )
+  endif
 
-        cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-        dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-        amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+  ! NOTE: npp*xdim might be larger than MAX_INT32
+  call mpi_recv( recv_buf, this%part_dim * recv_buf_size, p_dtype_real, &
+    src, tag, comm_world(), MPI_STATUS_IGNORE, ierr )
 
-        phase = phase * phase0
+  ! check if the particle buffer needs to be reallocated
+  ratio = real( recv_cnt ) / this%npmax
+  if ( ratio > 1.0 ) then
+    call write_stdout( '[process ' // num2str(id_proc()) // ']: Resizing 2D &
+          &particle buffer!', only_root = .false. )
+    call this%realloc( ratio = ratio * p_buf_incr )
+  endif
 
-        do j = 0, 1
-          w = weight(j,i) * real(phase)
-          cur( 1:3, ix(i)+j )  = cur( 1:3, ix(i)+j )  + w * u(1:3,i)
-          dcur( 1:2, ix(i)+j ) = dcur( 1:2, ix(i)+j ) + w * du(1:2)
-          amur( 1:3, ix(i)+j ) = amur( 1:3, ix(i)+j ) + w * u2(1:3)
+  this%npp = recv_cnt
 
-          w = weight(j,i) * aimag(phase)
-          cui( 1:3, ix(i)+j )  = cui( 1:3, ix(i)+j )  + w * u(1:3,i)
-          dcui( 1:2, ix(i)+j ) = dcui( 1:2, ix(i)+j ) + w * du(1:2)
-          amui( 1:3, ix(i)+j ) = amui( 1:3, ix(i)+j ) + w * u2(1:3)
-        enddo
+  do i = 1, this%npp
 
-      enddo
+    stride = (i-1) * this%part_dim
 
-      pp = pp + 1
-    enddo
+    this%x(1,i)   = recv_buf( 1 + stride )
+    this%x(2,i)   = recv_buf( 2 + stride )
+    this%p(1,i)   = recv_buf( 3 + stride )
+    this%p(2,i)   = recv_buf( 4 + stride )
+    this%p(3,i)   = recv_buf( 5 + stride )
+    this%gamma(i) = recv_buf( 6 + stride )
+    this%psi(i)   = recv_buf( 7 + stride )
+    this%q(i)     = recv_buf( 8 + stride )
 
   enddo
 
-  if ( noff == 0 ) then
-
-    ! guard cells on the axis are useless
-    cu0(1:3,0)  = 0.0
-    dcu0(1:2,0) = 0.0
-    amu0(1:3,0) = 0.0
-
-    cu0(1:2,1)  = 0.0; cu0(3,1) = 8.0 * cu0(3,1)
-    dcu0(1:2,1) = 0.0
-    amu0(1:3,1) = 0.0
-
-    do mode = 1, max_mode
-
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-      ! guard cells on the axis are useless
-      cur(1:3,0)  = 0.0; cui(1:3,0)  = 0.0
-      dcur(1:2,0) = 0.0; dcui(1:2,0) = 0.0
-      amur(1:3,0) = 0.0; amui(1:3,0) = 0.0
-       
-      if ( mode == 1 ) then
-        cur(1:2,1)  = 8.0 * cur(1:2,1); cur(3,1) = 0.0
-        dcur(1:2,1) = 8.0 * dcur(1:2,1)
-        amur(1:3,1) = 0.0
-        cui(1:2,1)  = 8.0 * cui(1:2,1); cui(3,1) = 0.0
-        dcui(1:2,1) = 8.0 * dcui(1:2,1)
-        amui(1:3,1) = 0.0
-      elseif ( mode == 2 ) then
-        cur(1:3,1)  = 0.0
-        dcur(1:2,1) = 0.0
-        amur(1:3,1) = 8.0 * amur(1:3,1)
-        cui(1:3,1)  = 0.0
-        dcui(1:2,1) = 0.0
-        amui(1:3,1) = 8.0 * amui(1:3,1)
-      else
-        cur(1:3,1)  = 0.0
-        dcur(1:2,1) = 0.0
-        amur(1:3,1) = 0.0
-        cui(1:3,1)  = 0.0
-        dcui(1:2,1) = 0.0
-        amui(1:3,1) = 0.0
-       endif
-    enddo
-
-    do j = 2, nrp + 1
-      ir = 1.0 / ( j + noff - 1 )
-      cu0(1:3,j)  = cu0(1:3,j)  * ir
-      dcu0(1:2,j) = dcu0(1:2,j) * ir
-      amu0(1:3,j) = amu0(1:3,j) * ir
-    enddo
-
-    do mode = 1, max_mode
-
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-      do j = 2, nrp + 1
-        ir = 1.0 / ( j + noff - 1 )
-        cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
-        dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
-        amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
-      enddo
-    enddo
-
-  else
-
-    do j = 0, nrp + 1
-       ir = 1.0 / ( j + noff - 1 )
-       cu0(1:3,j)  = cu0(1:3,j)  * ir
-       dcu0(1:2,j) = dcu0(1:2,j) * ir
-       amu0(1:3,j) = amu0(1:3,j) * ir
-    enddo
-
-    do mode = 1, max_mode
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-      do j = 0, nrp+1
-        ir = 1.0 / ( j + noff - 1 )
-        cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
-        dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
-        amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
-      enddo
-    enddo
-
+  ! check for errors
+  if (ierr /= 0) then
+    call write_err('MPI failed')
   endif
 
-  call stop_tprof( 'deposit 2D particles' )
   call write_dbg(cls_name, sname, cls_level, 'ends')
 
-end subroutine amjdeposit_std_part2d
+end subroutine piperecv_part2d
 
-subroutine amjdeposit_robust_part2d(this, ef, bf, cu, amu, dcu, dt_)
-! deposit the current, acceleration and momentum flux
+subroutine writehdf5_part2d(this,file)
 
   implicit none
 
   class(part2d), intent(inout) :: this
-  class(field), intent(in) :: cu, amu, dcu
-  class(field), intent(in) :: ef, bf
-  real, intent(in), optional :: dt_
+  class(hdf5file), intent(in) :: file
   ! local data
-  character(len=32), save :: sname = 'amjdeposit_robust_part2d'
-  type(ufield), dimension(:), pointer :: ef_re => null(), ef_im => null()
-  type(ufield), dimension(:), pointer :: bf_re => null(), bf_im => null()
-  type(ufield), dimension(:), pointer :: cu_re => null(), cu_im => null()
-  type(ufield), dimension(:), pointer :: dcu_re => null(), dcu_im => null()
-  type(ufield), dimension(:), pointer :: amu_re => null(), amu_im => null()
-
-  real, dimension(:, :), pointer :: cu0 => null(), dcu0 => null(), amu0 => null()
-  real, dimension(:, :), pointer :: cur => null(), dcur => null(), amur => null()
-  real, dimension(:, :), pointer :: cui => null(), dcui => null(), amui => null()
-
-  integer(kind=LG) :: ptrcur, pp
-  integer :: i, j, noff, nrp, np, mode, max_mode
-  integer, dimension(p_cache_size) :: ix
-  real, dimension(p_p_dim, p_cache_size) :: bp, ep, wp, u0, u, utmp
-  real, dimension(0:1, p_cache_size) :: weight
-  real, dimension(p_cache_size) :: pcos, psin
-  real, dimension(p_p_dim) :: du, u2
-  real :: qtmh, qtmh1, qtmh2, idt, gam, ostq, ipsi, dpsi, w, ir, dt
-  complex(kind=DB) :: phase, phase0
+  character(len=18), save :: sname = 'writehdf5_part2d'
+  integer :: ierr
 
   call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'deposit 2D particles' )
-
-  ef_re  => ef%get_rf_re();  ef_im  => ef%get_rf_im()
-  bf_re  => bf%get_rf_re();  bf_im  => bf%get_rf_im()
-  cu_re  => cu%get_rf_re();  cu_im  => cu%get_rf_im()
-  dcu_re => dcu%get_rf_re(); dcu_im => dcu%get_rf_im()
-  amu_re => amu%get_rf_re(); amu_im => amu%get_rf_im()
-
-  dt = this%dt
-  if (present(dt_)) dt = dt_
+  call pwpart( file, this%x, this%p, this%q, this%npp, 1, ierr )
+  call write_dbg(cls_name, sname, cls_level, 'ends')
 
-  idt = 1.0 / dt
-  qtmh = 0.5 * this%qbm * dt
-  max_mode = ef%get_max_mode()
+end subroutine writehdf5_part2d
 
-  noff = cu_re(0)%get_noff(1)
-  nrp  = cu_re(0)%get_ndp(1)
+subroutine sort_part2d( this, nrp, noff )
 
-  cu0  => cu_re(0)%get_f1()
-  dcu0 => dcu_re(0)%get_f1()
-  amu0 => amu_re(0)%get_f1()
+  implicit none
+  class(part2d), intent(inout) :: this
+  integer, intent(in) :: nrp, noff
 
-  do ptrcur = 1, this%npp, p_cache_size
+  ! local data
+  integer :: i
+  integer, dimension(:), allocatable :: ix
+  real :: idr, pos
+  character(len=18), save :: sname = 'sort_part2d'
 
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
+  call write_dbg(cls_name, sname, cls_level, 'starts')
+  call start_tprof( 'sort 2D particles' )
 
-    ! interpolate fields to particles
-    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, ix, pcos, psin)
-    call interp_field(ef_re, ef_im, max_mode, weight, ix, pcos, psin, np, ep)
-    call interp_field(bf_re, bf_im, max_mode, weight, ix, pcos, psin, np, bp)
+  if ( .not. allocated(sort_idx) ) allocate( sort_idx(this%npmax) )
+  if ( this%npmax > size(sort_idx) ) then
+    deallocate(sort_idx)
+    allocate( sort_idx(this%npmax) )
+  endif
 
-    ! calculate wake field
-    pp = ptrcur
-    do i = 1, np
-      wp(1, i) = ep(1, i) - bp(2, i)
-      wp(2, i) = ep(2, i) + bp(1, i)
-      wp(3, i) = ep(3, i)
+  allocate( ix(this%npp) )
+  ix = 0
 
-      ! transform momentum from Cartesian to cylindrical coordinates
-      u0(1, i) = this%p(1, pp) * pcos(i) + this%p(2, pp) * psin(i)
-      u0(2, i) = this%p(2, pp) * pcos(i) - this%p(1, pp) * psin(i)
-      u0(3, i) = this%p(3, pp)
+  idr = 1.0 / this%dr
+  ! calculate the grid index of particles
+  do i = 1, this%npp
+    pos = sqrt( this%x(1,i)**2 + this%x(2,i)**2 ) * idr
+    ix(i) = floor(pos) - noff + 1
+  enddo
 
-      ! half electric acceleration
-      gam = sqrt(1.0 + u0(1, i)**2 + u0(2, i)**2 + u0(3, i)**2)
-      qtmh1 = qtmh * gam / (gam - u0(3, i))
-      ep(:, i) = ep(:, i) * qtmh1
-      utmp(:, i) = u0(:, i) + ep(:, i)
+  ! generate the sorted indices
+  call generate_sort_idx_1d( ix, sort_idx, int(this%npp, kind=4), nrp )
 
-      ! scale magnetic field
-      gam = sqrt(1.0 + utmp(1, i)**2 + utmp(2, i)**2 + utmp(3, i)**2)
-      qtmh2 = qtmh / (gam - utmp(3, i))
-      bp(:, i) = bp(:, i) * qtmh2
+  ! rearrange the particle position
+  this%pbuf(1:this%npp) = this%x( 1, 1:this%npp )
+  do i = 1, this%npp
+    this%x( 1, sort_idx(i) ) = this%pbuf(i)
+  enddo
+  this%pbuf(1:this%npp) = this%x( 2, 1:this%npp )
+  do i = 1, this%npp
+    this%x( 2, sort_idx(i) ) = this%pbuf(i)
+  enddo
 
-      ! magnetic rotation
-      u(1, i) = utmp(1, i) + utmp(2, i) * bp(3, i) - utmp(3, i) * bp(2, i)
-      u(2, i) = utmp(2, i) + utmp(3, i) * bp(1, i) - utmp(1, i) * bp(3, i)
-      u(3, i) = utmp(3, i) + utmp(1, i) * bp(2, i) - utmp(2, i) * bp(1, i)
+  ! rearrange the particle momentum
+  this%pbuf(1:this%npp) = this%p( 1, 1:this%npp )
+  do i = 1, this%npp
+    this%p( 1, sort_idx(i) ) = this%pbuf(i)
+  enddo
+  this%pbuf(1:this%npp) = this%p( 2, 1:this%npp )
+  do i = 1, this%npp
+    this%p( 2, sort_idx(i) ) = this%pbuf(i)
+  enddo
+  this%pbuf(1:this%npp) = this%p( 3, 1:this%npp )
+  do i = 1, this%npp
+    this%p( 3, sort_idx(i) ) = this%pbuf(i)
+  enddo
 
-      ostq = 2.0 / (1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2)
-      bp(:, i) = bp(:, i) * ostq
+  ! rearrange the particle gamma
+  this%pbuf(1:this%npp) = this%gamma( 1:this%npp )
+  do i = 1, this%npp
+    this%gamma( sort_idx(i) ) = this%pbuf(i)
+  enddo
 
-      utmp(1, i) = utmp(1, i) + u(2, i) * bp(3, i) - u(3, i) * bp(2, i)
-      utmp(2, i) = utmp(2, i) + u(3, i) * bp(1, i) - u(1, i) * bp(3, i)
-      utmp(3, i) = utmp(3, i) + u(1, i) * bp(2, i) - u(2, i) * bp(1, i)
+  ! rearrange the particle charge
+  this%pbuf(1:this%npp) = this%q( 1:this%npp )
+  do i = 1, this%npp
+    this%q( sort_idx(i) ) = this%pbuf(i)
+  enddo
 
-      ! half electric acceleration
-      u(:, i) = utmp(:, i) + ep(:, i)
+  ! rearrange the particle psi
+  this%pbuf(1:this%npp) = this%psi( 1:this%npp )
+  do i = 1, this%npp
+    this%psi( sort_idx(i) ) = this%pbuf(i)
+  enddo
 
-      pp = pp + 1
-    enddo
+  deallocate( ix )
 
-    ! calculate and store time-centered values
-    ! deposit momentum flux, acceleration density, and current density
-    pp = ptrcur
-    do i = 1, np
+  call stop_tprof( 'sort 2D particles' )
+  call write_dbg(cls_name, sname, cls_level, 'ends')
+    
+end subroutine sort_part2d
 
-      du(1) = idt * ( u(1,i) - u0(1,i) )
-      du(2) = idt * ( u(2,i) - u0(2,i) )
-      
-      u(:,i)  = 0.5 * ( u(:,i) + u0(:,i) )
-      this%gamma(pp) = sqrt( 1.0 + u(1,i)**2 + u(2,i)**2 + u(3,i)**2 )
-      ipsi = 1.0 / (this%gamma(pp) - u(3,i))
-      this%psi(pp) = (1.0 - 1.0 / ipsi) / this%qbm
+function get_deposit_ax_corr( ppc_r, part_shape ) result(res)
+    implicit none
+    integer, intent(in) :: ppc_r
+    real, dimension(1,2) :: res
+    integer, intent(in) :: part_shape
+    
+    res = 0.0
+    select case(part_shape)
+    case(p_ps_linear)
+        res(1,1) = (12.0 * ppc_r**2) / ( 1.0 + 2.0 * ppc_r**2 ) 
+    case(p_ps_quadratic)
+        if (mod(ppc_r,2)==0) then
+            res(1,1) = (384.0 * ppc_r**2) / (14.0 + 79.0 * ppc_r**2)
+            res(1,2) = (384.0 * ppc_r**2) / (2.0 + 385.0 * ppc_r**2)
+        else
+            res(1,1) = (384.0 * ppc_r**4) / (3.0 + 14.0 * ppc_r**2 + 79.0 * ppc_r**4)
+            res(1,2) = (384.0 * ppc_r**4) / (-3.0 + 2.0 * ppc_r**2 + 385.0 * ppc_r**4)
+        endif
+  case default
+      call write_err('Not implemented yet')
+  end select
+  end function get_deposit_ax_corr
+  
+#define __TEMPLATE__
+
+#define SPLINE spline_linear
+#define QDEPOSIT qdeposit_part2d_s1
+#define CHIDEPOSIT deposit_chi_part2d_s1
+! #define AX_COOR get_deposit_ax_corr_s1
+#define LINEAR
+! #define CORRNUM 1
+#define AMJ_STD amjdeposit_std_part2d_s1
+#define AMJ_ROB amjdeposit_robust_part2d_s1
+#define AMJ_STD_PGC amjdeposit_std_pgc_part2d_s1
+#define AMJ_ROB_PGC amjdeposit_robust_pgc_part2d_s1
+#define PUSH_STD push_u_std_part2d_s1
+#define PUSH_ROB push_u_robust_part2d_s1
+#define PUSH_ROB_PGC push_u_robust_pgc_part2d_s1
+#define PUSH_STD_PGC push_u_std_pgc_part2d_s1
+#define LP 0
+#define UP 1
+
+#include __FILE__
+
+#define SPLINE spline_quadratic
+#define QDEPOSIT qdeposit_part2d_s2
+#define CHIDEPOSIT deposit_chi_part2d_s2
+! #define AX_COOR get_deposit_ax_corr_s2
+#define QUADRATIC
+! #define CORRNUM 2
+#define AMJ_STD amjdeposit_std_part2d_s2
+#define AMJ_ROB amjdeposit_robust_part2d_s2
+#define AMJ_STD_PGC amjdeposit_std_pgc_part2d_s2
+#define AMJ_ROB_PGC amjdeposit_robust_pgc_part2d_s2
+#define PUSH_STD push_u_std_part2d_s2
+#define PUSH_ROB push_u_robust_part2d_s2
+#define PUSH_ROB_PGC push_u_robust_pgc_part2d_s2
+#define PUSH_STD_PGC push_u_std_pgc_part2d_s2
+#define LP -1
+#define UP 1
+
+#include __FILE__
+
+end module part2d_class
+
+#else
+    
+subroutine QDEPOSIT( this, q )
+! deposit the charge density (rho - Jz)
 
-      dpsi = this%qbm * (wp(3, i) - (wp(1, i) * u(1, i) + wp(2, i) * u(2, i)) * ipsi)
+  implicit none
 
-      du(1) = du(1) + u(1, i) * dpsi * ipsi
-      du(2) = du(2) + u(2, i) * dpsi * ipsi
+  class(part2d), intent(in) :: this
+  class(field), intent(in) :: q
+  ! local data
+  type(ufield), dimension(:), pointer :: q_re => null(), q_im => null()
+  complex(kind=DB) :: phase, phase0
+  real, dimension(:,:), pointer :: q0 => null(), qr => null(), qi => null()
+  real, dimension(LP:UP) :: wt ! interpolation weight
+  real :: pos_norm ! normalized position
+  real :: idr, ir
+  integer(kind=LG) :: ptrcur, pp
+  integer :: i, j, nn, noff, nrp, np, mode, max_mode
 
-      u2(1) = u(1, i) * u(1, i) * ipsi
-      u2(2) = u(1, i) * u(2, i) * ipsi
-      u2(3) = u(2, i) * u(2, i) * ipsi
+  character(len=18), save :: sname = 'qdeposit_part2d'
 
-      phase0 = cmplx( pcos(i), -psin(i) )
-      phase  = cmplx( 1.0, 0.0 ) * this%q(pp) * ipsi
+  call write_dbg(cls_name, sname, cls_level, 'starts')
+  call start_tprof( 'deposit 2D particles' )
 
-      ! deposit m = 0 mode
-      do j = 0, 1
-        w = weight(j,i) * real(phase)
-        cu0( 1:3, ix(i)+j )  = cu0( 1:3, ix(i)+j )  + w * u(1:3,i)
-        dcu0( 1:2, ix(i)+j ) = dcu0( 1:2, ix(i)+j ) + w * du(1:2)
-        amu0( 1:3, ix(i)+j ) = amu0( 1:3, ix(i)+j ) + w * u2(1:3)
-      enddo
+  idr = 1.0 / this%dr
 
-      ! deposit m > 0 mode
-      do mode = 1, max_mode
+  q_re => q%get_rf_re()
+  q_im => q%get_rf_im()
 
-        cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-        dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-        amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+  max_mode = q%get_max_mode()
 
-        phase = phase * phase0
+  noff = q_re(0)%get_noff(1)
+  nrp  = q_re(0)%get_ndp(1)
+  q0   => q_re(0)%get_f1()
 
-        do j = 0, 1
-          w = weight(j,i) * real(phase)
-          cur( 1:3, ix(i)+j )  = cur( 1:3, ix(i)+j )  + w * u(1:3,i)
-          dcur( 1:2, ix(i)+j ) = dcur( 1:2, ix(i)+j ) + w * du(1:2)
-          amur( 1:3, ix(i)+j ) = amur( 1:3, ix(i)+j ) + w * u2(1:3)
+  do ptrcur = 1, this%npp, p_cache_size
 
-          w = weight(j,i) * aimag(phase)
-          cui( 1:3, ix(i)+j )  = cui( 1:3, ix(i)+j )  + w * u(1:3,i)
-          dcui( 1:2, ix(i)+j ) = dcui( 1:2, ix(i)+j ) + w * du(1:2)
-          amui( 1:3, ix(i)+j ) = amui( 1:3, ix(i)+j ) + w * u2(1:3)
-        enddo
+    ! check if last copy of table and set np
+    if( ptrcur + p_cache_size > this%npp ) then
+      np = this%npp - ptrcur + 1
+    else
+      np = p_cache_size
+    endif
 
-      enddo
+    pp = ptrcur
+    do i = 1, np
+       ! get normalized position
+       pos_norm= sqrt( this%x(1, pp)**2 + this%x(2, pp)**2 ) * idr
+       phase0 = cmplx( this%x(1, pp), -this%x(2, pp) ) / pos_norm * idr
 
-      pp = pp + 1
-    enddo
+        nn = floor( pos_norm )
+        ! in-cell position
+        pos_norm = pos_norm - real(nn)
+        nn = nn - noff + 1
 
-  enddo
+#ifdef QUADRATIC
+        nn = nn + floor(pos_norm-0.5) + 1
+        pos_norm = pos_norm - floor(pos_norm-0.5) - 1
+#endif
 
-  if ( noff == 0 ) then
+        call SPLINE( pos_norm, wt )
 
-    ! guard cells on the axis are useless
-    cu0(1:3,0)  = 0.0
-    dcu0(1:2,0) = 0.0
-    amu0(1:3,0) = 0.0
+        phase = cmplx( 1.0, 0.0 ) * this%q(pp)
+        ! deposit m=0 mode
+        do j = LP, UP
+            q0( 1, nn+j ) = q0( 1, nn+j ) + wt(j) * real(phase)
+        enddo
+        
+        ! deposit m>0 mode
+        do mode = 1, max_mode
+            qr => q_re(mode)%get_f1()
+            qi => q_im(mode)%get_f1()
+            phase = phase * phase0
+    
+            do j = LP, UP
+            qr( 1, nn+j ) = qr( 1, nn+j ) + wt(j) * real(phase)
+            qi( 1, nn+j ) = qi( 1, nn+j ) + wt(j) * aimag(phase)
+            enddo
 
-    cu0(1:2,1)  = 0.0; cu0(3,1) = 8.0 * cu0(3,1)
-    dcu0(1:2,1) = 0.0
-    amu0(1:3,1) = 0.0
+        enddo
+        pp = pp + 1
+    enddo
 
-    do mode = 1, max_mode
+  enddo
 
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-      ! guard cells on the axis are useless
-      cur(1:3,0)  = 0.0; cui(1:3,0)  = 0.0
-      dcur(1:2,0) = 0.0; dcui(1:2,0) = 0.0
-      amur(1:3,0) = 0.0; amui(1:3,0) = 0.0
-       
-      if ( mode == 1 ) then
-        cur(1:2,1)  = 8.0 * cur(1:2,1); cur(3,1) = 0.0
-        dcur(1:2,1) = 8.0 * dcur(1:2,1)
-        amur(1:3,1) = 0.0
-        cui(1:2,1)  = 8.0 * cui(1:2,1); cui(3,1) = 0.0
-        dcui(1:2,1) = 8.0 * dcui(1:2,1)
-        amui(1:3,1) = 0.0
-      elseif ( mode == 2 ) then
-        cur(1:3,1)  = 0.0
-        dcur(1:2,1) = 0.0
-        amur(1:3,1) = 8.0 * amur(1:3,1)
-        cui(1:3,1)  = 0.0
-        dcui(1:2,1) = 0.0
-        amui(1:3,1) = 8.0 * amui(1:3,1)
-      else
-        cur(1:3,1)  = 0.0
-        dcur(1:2,1) = 0.0
-        amur(1:3,1) = 0.0
-        cui(1:3,1)  = 0.0
-        dcui(1:2,1) = 0.0
-        amui(1:3,1) = 0.0
-       endif
-    enddo
+  if ( id_proc_loc() == 0 ) then
+    
+! Correct the charge on axis by adding the value on the guard cell 0
+! TODO: the data exchanged between partitions should be 2!!
+#ifdef QUADRATIC
+    q0(1,1) = q0(1,1) + q0(1,0)
+#endif
 
+    q0(1,0) = 0.0 ! guard cell is useless on axis
+    q0(1,1) = 8.0 * q0(1,1)
+    ! q0(1,1) = q0(1,1) * this%deposit_ax_corr
     do j = 2, nrp + 1
       ir = 1.0 / ( j + noff - 1 )
-      cu0(1:3,j)  = cu0(1:3,j)  * ir
-      dcu0(1:2,j) = dcu0(1:2,j) * ir
-      amu0(1:3,j) = amu0(1:3,j) * ir
+      q0(1,j) = q0(1,j) * ir
     enddo
 
-    do mode = 1, max_mode
-
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    do i = 1, max_mode
+      qr => q_re(i)%get_f1()
+      qi => q_im(i)%get_f1()
+      qr(1,0) = 0.0
+      qi(1,0) = 0.0
+      qr(1,1) = 0.0
+      qi(1,1) = 0.0
 
       do j = 2, nrp + 1
         ir = 1.0 / ( j + noff - 1 )
-        cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
-        dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
-        amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+        qr(1,j) = qr(1,j) * ir
+        qi(1,j) = qi(1,j) * ir
       enddo
     enddo
 
   else
-
+! #ifdef QUADRATIC
+! ! Correct the charge on the boundary by adding the value on the guard cell nrp+2
+!     if (id_proc_loc() == num_procs_loc()-1) then
+!         q0(1, nrp) = q0(1, nrp) + q0(1, nrp+2)
+!         do i = 1, max_mode
+!             qr => q_re(i)%get_f1()
+!             qi => q_im(i)%get_f1()
+!             qr(1, nrp) = qr(1, nrp) + qr(1, nrp+2)
+!             qi(1, nrp) = qi(1, nrp) + qi(1, nrp+2)
+!         enddo
+!     endif
+! #endif
     do j = 0, nrp + 1
-       ir = 1.0 / ( j + noff - 1 )
-       cu0(1:3,j)  = cu0(1:3,j)  * ir
-       dcu0(1:2,j) = dcu0(1:2,j) * ir
-       amu0(1:3,j) = amu0(1:3,j) * ir
+      ir = 1.0 / ( j + noff - 1 )
+      q0(1,j) = q0(1,j) * ir
     enddo
 
-    do mode = 1, max_mode
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-      do j = 0, nrp+1
+    do i = 1, max_mode
+      qr => q_re(i)%get_f1()
+      qi => q_im(i)%get_f1()
+      do j = 0, nrp + 1
         ir = 1.0 / ( j + noff - 1 )
-        cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
-        dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
-        amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+        qr(1,j) = qr(1,j) * ir
+        qi(1,j) = qi(1,j) * ir
       enddo
     enddo
 
@@ -1007,76 +1194,35 @@ subroutine amjdeposit_robust_part2d(this, ef, bf, cu, amu, dcu, dt_)
   call stop_tprof( 'deposit 2D particles' )
   call write_dbg(cls_name, sname, cls_level, 'ends')
 
-end subroutine amjdeposit_robust_part2d
+end subroutine QDEPOSIT
 
-subroutine amjdeposit_std_pgc_part2d(this, ef, bf, af, cu, amu, dcu, dt_)
-! deposit the current, acceleration and momentum flux
 
-  implicit none
+subroutine CHIDEPOSIT( this, chi )
+! deposit the plasma susceptibility
 
-  class(part2d), intent(inout) :: this
-  class(field), intent(in) :: cu, amu, dcu
-  class(field), intent(in) :: ef, bf
-  class(field_laser), intent(in) :: af
-  real, intent(in), optional :: dt_
+  implicit none
+  class(part2d), intent(in) :: this
+  class(field), intent(inout) :: chi
   ! local data
-  character(len=32), save :: sname = 'amjdeposit_std_pgc_part2d'
-  type(ufield), dimension(:), pointer :: ef_re => null(), ef_im => null()
-  type(ufield), dimension(:), pointer :: bf_re => null(), bf_im => null()
-  type(ufield), dimension(:), pointer :: cu_re => null(), cu_im => null()
-  type(ufield), dimension(:), pointer :: dcu_re => null(), dcu_im => null()
-  type(ufield), dimension(:), pointer :: amu_re => null(), amu_im => null()
-  type(ufield), dimension(:), pointer :: ar_re, ar_im, ai_re, ai_im
-  type(ufield), dimension(:), pointer :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
-
-  real, dimension(:,:), pointer :: cu0 => null(), dcu0 => null(), amu0 => null()
-  real, dimension(:,:), pointer :: cur => null(), dcur => null(), amur => null()
-  real, dimension(:,:), pointer :: cui => null(), dcui => null(), amui => null()
-
-  integer(kind=LG) :: ptrcur, pp
-  integer :: i, j, noff, nrp, np, mode, max_mode
-  integer, dimension(p_cache_size) :: ix
-  real, dimension(p_p_dim, p_cache_size) :: bp, ep, wp, u0, u
-  real, dimension(p_cache_size) :: apr, api
-  real, dimension(3,p_cache_size) :: apr_grad, api_grad
-  real, dimension(0:1, p_cache_size) :: weight
-  real, dimension(p_cache_size) :: pcos, psin
-  real, dimension(p_p_dim) :: du, u2, utmp
-  real :: qtmh, qtmh_e, qtmh_b, idt, gam, ostq, ipsi, dpsi, w, ir, gam_corr, tmp, dt
+  type(ufield), dimension(:), pointer :: chi_re => null(), chi_im => null()
   complex(kind=DB) :: phase, phase0
+  real, dimension(LP:UP) :: wt ! interpolation weight
+  real :: pos_norm ! normalized position
+  real :: idr, ir
+  integer(kind=LG) :: ptrcur, pp
+  integer :: i, j, nn, noff, nrp, np, mode, max_mode
+  
+  character(len=18), save :: sname = 'deposit_chi_part2d'
 
   call write_dbg(cls_name, sname, cls_level, 'starts')
   call start_tprof( 'deposit 2D particles' )
 
-  ef_re  => ef%get_rf_re();  ef_im  => ef%get_rf_im()
-  bf_re  => bf%get_rf_re();  bf_im  => bf%get_rf_im()
-  cu_re  => cu%get_rf_re();  cu_im  => cu%get_rf_im()
-  dcu_re => dcu%get_rf_re(); dcu_im => dcu%get_rf_im()
-  amu_re => amu%get_rf_re(); amu_im => amu%get_rf_im()
-
-  dt = this%dt
-  if (present(dt_)) dt = dt_
-
-  idt = 1.0 / dt
-  qtmh = 0.5 * this%qbm * dt
-  max_mode = ef%get_max_mode()
-
-  noff = cu_re(0)%get_noff(1)
-  nrp  = cu_re(0)%get_ndp(1)
-
-  cu0  => cu_re(0)%get_f1()
-  dcu0 => dcu_re(0)%get_f1()
-  amu0 => amu_re(0)%get_f1()
-
-  ar_re => af%get_cfr_re()
-  ar_im => af%get_cfr_im()
-  ai_re => af%get_cfi_re()
-  ai_im => af%get_cfi_im()
-
-  ar_grad_re => af%ar_grad_re
-  ar_grad_im => af%ar_grad_im
-  ai_grad_re => af%ai_grad_re
-  ai_grad_im => af%ai_grad_im
+  chi_re => chi%get_rf_re()
+  chi_im => chi%get_rf_im()
+  idr      = 1.0 / this%dr
+  max_mode = chi%get_max_mode()
+  noff     = chi_re(0)%get_noff(1)
+  nrp      = chi_re(0)%get_ndp(1)
 
   do ptrcur = 1, this%npp, p_cache_size
 
@@ -1087,123 +1233,39 @@ subroutine amjdeposit_std_pgc_part2d(this, ef, bf, af, cu, amu, dcu, dt_)
       np = p_cache_size
     endif
 
-    ! interpolate fields to particles
-    ! call interp_emf_part2d( ef_re, ef_im, bf_re, bf_im, max_mode, this%x, this%dr, &
-    !   bp, ep, np, ptrcur, p_cylindrical, weight = wt, ix = ix, pcos = cc, psin = ss )
-    ! call interp_laser_part2d( ar_re, ar_im, ai_re, ai_im, &
-    !   ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im, &
-    !   max_mode, this%x, this%dr, apr, api, apr_grad, api_grad, np, ptrcur, p_cylindrical )
-    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, ix, pcos, psin)
-    call interp_field(ef_re, ef_im, max_mode, weight, ix, pcos, psin, np, ep)
-    call interp_field(bf_re, bf_im, max_mode, weight, ix, pcos, psin, np, bp)
-    call interp_field(ar_re, ar_im, max_mode, weight, ix, pcos, psin, np, apr)
-    call interp_field(ai_re, ai_im, max_mode, weight, ix, pcos, psin, np, api)
-    call interp_field(ar_grad_re, ar_grad_im, max_mode, weight, ix, pcos, psin, np, apr_grad)
-    call interp_field(ai_grad_re, ai_grad_im, max_mode, weight, ix, pcos, psin, np, api_grad)
-
-    ! advance the particle momentum
     pp = ptrcur
     do i = 1, np
 
-      ! calculate the correction factor for gamma due to ponderomotive force
-      gam_corr = 0.5 * this%qbm**2 * (apr(i)**2 + api(i)**2)
-
-      ! transform momentum from Cartesian to cylindrical coordinates
-      u0(1, i) = this%p(1, pp) * pcos(i) + this%p(2, pp) * psin(i)
-      u0(2, i) = this%p(2, pp) * pcos(i) - this%p(1, pp) * psin(i)
-      u0(3, i) = this%p(3, pp)
-
-      ! calculate the averaged gamma factor
-      gam = sqrt(1.0 + u0(1, i)**2 + u0(2, i)**2 + u0(3, i)**2 + gam_corr)
-
-      ! calculate wake field
-      wp(1, i) = ep(1, i) - bp(2, i)
-      wp(2, i) = ep(2, i) + bp(1, i)
-      wp(3, i) = ep(3, i)
-
-      ! TODO: There is a strict time-centered scheme for the standard-PGC pusher, which is left for future work.
-
-      ! calculate the effective electric fields
-      ! note that here the time-centered gamma is unknown, so we approximate it using the initial one.
-      tmp = 0.5 * this%qbm / gam
-      ep(1, i) = ep(1, i) - tmp * (apr(i) * apr_grad(1, i) + api(i) * api_grad(1, i))
-      ep(2, i) = ep(2, i) - tmp * (apr(i) * apr_grad(2, i) + api(i) * api_grad(2, i))
-      ep(3, i) = ep(3, i) + tmp * (apr(i) * apr_grad(3, i) + api(i) * api_grad(3, i))
-
-      ! half electric acceleration
-      qtmh_b = qtmh / (1.0 - this%qbm * this%psi(pp))
-      qtmh_e = qtmh_b * gam
-      utmp(:) = u0(:, i) + ep(:, i) * qtmh_e
-
-      ! magnetic rotation
-      bp(:, i) = bp(:, i) * qtmh_b
-      u(1, i) = utmp(1) + utmp(2) * bp(3, i) - utmp(3) * bp(2, i)
-      u(2, i) = utmp(2) + utmp(3) * bp(1, i) - utmp(1) * bp(3, i)
-      u(3, i) = utmp(3) + utmp(1) * bp(2, i) - utmp(2) * bp(1, i)
-
-      ostq = 2.0 / (1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2)
-      bp(:, i) = bp(:, i) * ostq
-
-      utmp(1) = utmp(1) + u(2, i) * bp(3, i) - u(3, i) * bp(2, i)
-      utmp(2) = utmp(2) + u(3, i) * bp(1, i) - u(1, i) * bp(3, i)
-      utmp(3) = utmp(3) + u(1, i) * bp(2, i) - u(2, i) * bp(1, i)
-
-      ! half electric acceleration
-      gam = sqrt(1.0 + utmp(1)**2 + utmp(2)**2 + utmp(3)**2 + gam_corr)
-      qtmh_e = qtmh_b * gam
-      u(:, i) = utmp(:) + ep(:, i) * qtmh_e
-
-      ! calculate and store time-centered values
-      ! deposit momentum flux, acceleration density, and current density
-      du(1) = idt * (u(1, i) - u0(1, i))
-      du(2) = idt * (u(2, i) - u0(2, i))
-      
-      u(:, i)  = 0.5 * (u(:, i) + u0(:, i))
-      ! store the time-centered gamma
-      this%gamma(pp) = sqrt(1.0 + u(1, i)**2 + u(2, i)**2 + u(3, i)**2 + gam_corr)
-
-      ipsi = 1.0 / (1.0 - this%qbm * this%psi(pp))
-      dpsi = this%qbm * (wp(3, i) - (wp(1, i) * u(1, i) + wp(2, i) * u(2, i)) * ipsi)
+      ! get normalized position
+      pos_norm = sqrt( this%x(1, pp)**2 + this%x(2, pp)**2 ) * idr
+      phase0 = cmplx( this%x(1, pp), -this%x(2, pp) ) / pos_norm * idr
 
-      du(1) = du(1) + u(1, i) * dpsi * ipsi
-      du(2) = du(2) + u(2, i) * dpsi * ipsi
+      ! calculate in-cell position
+      nn = floor( pos_norm )
+      pos_norm = pos_norm - real(nn)
+      nn = nn - noff + 1
 
-      u2(1) = u(1, i) * u(1, i) * ipsi
-      u2(2) = u(1, i) * u(2, i) * ipsi
-      u2(3) = u(2, i) * u(2, i) * ipsi
+#ifdef QUADRATIC
+      nn = nn + floor(pos_norm-0.5) + 1
+      pos_norm = pos_norm - floor(pos_norm-0.5) - 1
+#endif
 
-      phase0 = cmplx(pcos(i), -psin(i))
-      phase  = cmplx(1.0, 0.0) * this%q(pp) * ipsi
+      call SPLINE( pos_norm, wt )
 
+      ! phase = -1.0 * cmplx( 1.0, 0.0 ) * this%qbm * this%q(pp) / this%psi(pp)
+      phase = -1.0 * cmplx( 1.0, 0.0 ) * this%qbm * this%q(pp) / (1.0 - this%qbm * this%psi(pp))
       ! deposit m = 0 mode
-      do j = 0, 1
-        w = weight(j, i) * real(phase)
-        cu0(1:3, ix(i)+j)  = cu0(1:3, ix(i) + j)  + w * u(1:3, i)
-        dcu0(1:2, ix(i)+j) = dcu0(1:2, ix(i) + j) + w * du(1:2)
-        amu0(1:3, ix(i)+j) = amu0(1:3, ix(i) + j) + w * u2(1:3)
+      do j = LP, UP
+        chi_re(0)%f1(1,nn+j) = chi_re(0)%f1(1,nn+j) + wt(j) * real(phase)
       enddo
 
-      ! deposit m > 0 mode
+      ! deposit m > 0 modes
       do mode = 1, max_mode
-
-        cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-        dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-        amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
         phase = phase * phase0
-
-        do j = 0, 1
-          w = weight(j,i) * real(phase)
-          cur( 1:3, ix(i)+j )  = cur( 1:3, ix(i)+j )  + w * u(1:3,i)
-          dcur( 1:2, ix(i)+j ) = dcur( 1:2, ix(i)+j ) + w * du(1:2)
-          amur( 1:3, ix(i)+j ) = amur( 1:3, ix(i)+j ) + w * u2(1:3)
-
-          w = weight(j,i) * aimag(phase)
-          cui( 1:3, ix(i)+j )  = cui( 1:3, ix(i)+j )  + w * u(1:3,i)
-          dcui( 1:2, ix(i)+j ) = dcui( 1:2, ix(i)+j ) + w * du(1:2)
-          amui( 1:3, ix(i)+j ) = amui( 1:3, ix(i)+j ) + w * u2(1:3)
+        do j = LP, UP
+          chi_re(mode)%f1(1,nn+j) = chi_re(mode)%f1(1,nn+j) + wt(j) * real(phase)
+          chi_im(mode)%f1(1,nn+j) = chi_im(mode)%f1(1,nn+j) + wt(j) * aimag(phase)
         enddo
-
       enddo
 
       pp = pp + 1
@@ -1211,92 +1273,69 @@ subroutine amjdeposit_std_pgc_part2d(this, ef, bf, af, cu, amu, dcu, dt_)
 
   enddo
 
-  if ( noff == 0 ) then
-
-    ! guard cells on the axis are useless
-    cu0(1:3,0)  = 0.0
-    dcu0(1:2,0) = 0.0
-    amu0(1:3,0) = 0.0
-
-    cu0(1:2,1)  = 0.0; cu0(3,1) = 8.0 * cu0(3,1)
-    dcu0(1:2,1) = 0.0
-    amu0(1:3,1) = 0.0
-
-    do mode = 1, max_mode
+  if ( id_proc_loc() == 0 ) then
+    
+! Correct the charge on axis by adding the value on the guard cell 0
+#ifdef QUADRATIC
+    chi_re(0)%f1(1,1) = chi_re(0)%f1(1,1) + chi_re(0)%f1(1,0)
+#endif
 
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    chi_re(0)%f1(1,0) = 0.0 ! guard cell is useless on axis
+    ! chi_re(0)%f1(1,1) = 8.0 * chi_re(0)%f1(1,1) ! is this correct???
+#ifdef LINEAR
+    chi_re(0)%f1(1,1) = chi_re(0)%f1(1,1) * this%deposit_ax_corr(1,1)
+#endif
 
-      ! guard cells on the axis are useless
-      cur(1:3,0)  = 0.0; cui(1:3,0)  = 0.0
-      dcur(1:2,0) = 0.0; dcui(1:2,0) = 0.0
-      amur(1:3,0) = 0.0; amui(1:3,0) = 0.0
-        
-      if ( mode == 1 ) then
-        cur(1:2,1)  = 8.0 * cur(1:2,1); cur(3,1) = 0.0
-        dcur(1:2,1) = 8.0 * dcur(1:2,1)
-        amur(1:3,1) = 0.0
-        cui(1:2,1)  = 8.0 * cui(1:2,1); cui(3,1) = 0.0
-        dcui(1:2,1) = 8.0 * dcui(1:2,1)
-        amui(1:3,1) = 0.0
-      elseif ( mode == 2 ) then
-        cur(1:3,1)  = 0.0
-        dcur(1:2,1) = 0.0
-        amur(1:3,1) = 8.0 * amur(1:3,1)
-        cui(1:3,1)  = 0.0
-        dcui(1:2,1) = 0.0
-        amui(1:3,1) = 8.0 * amui(1:3,1)
-      else
-        cur(1:3,1)  = 0.0
-        dcur(1:2,1) = 0.0
-        amur(1:3,1) = 0.0
-        cui(1:3,1)  = 0.0
-        dcui(1:2,1) = 0.0
-        amui(1:3,1) = 0.0
-        endif
-    enddo
+#ifdef QUADRATIC
+    chi_re(0)%f1(1,1) = chi_re(0)%f1(1,1) * this%deposit_ax_corr(1,1)
+    chi_re(0)%f1(1,2) = chi_re(0)%f1(1,2) * this%deposit_ax_corr(1,2)
+#endif
 
     do j = 2, nrp + 1
       ir = 1.0 / ( j + noff - 1 )
-      cu0(1:3,j)  = cu0(1:3,j)  * ir
-      dcu0(1:2,j) = dcu0(1:2,j) * ir
-      amu0(1:3,j) = amu0(1:3,j) * ir
+      chi_re(0)%f1(1,j) = chi_re(0)%f1(1,j) * ir
     enddo
 
     do mode = 1, max_mode
-
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+      ! guard cell is useless on axis
+      chi_re(mode)%f1(1,0) = 0.0
+      chi_im(mode)%f1(1,0) = 0.0
+      ! on-axis values are all zeros
+      chi_re(mode)%f1(1,1) = 0.0
+      chi_im(mode)%f1(1,1) = 0.0
+#ifdef QUADRATIC
+      chi_re(mode)%f1(1,2) = chi_re(mode)%f1(1,2) * this%deposit_ax_corr(1,2)
+      chi_im(mode)%f1(1,2) = chi_im(mode)%f1(1,2) * this%deposit_ax_corr(1,2)
+#endif
 
       do j = 2, nrp + 1
         ir = 1.0 / ( j + noff - 1 )
-        cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
-        dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
-        amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+        chi_re(mode)%f1(1,j) = chi_re(mode)%f1(1,j) * ir
+        chi_im(mode)%f1(1,j) = chi_im(mode)%f1(1,j) * ir
       enddo
     enddo
 
   else
-
+    ! #ifdef QUADRATIC
+    ! ! Correct the chi on the boundary by adding the value on the guard cell nrp+2
+    !     if (id_proc_loc() == num_procs_loc() - 1) then
+    !         chi_re(0)%f1(1,nrp) = chi_re(0)%f1(1,nrp) + chi_re(0)%f1(1,nrp+2)            
+    !         do mode = 1, max_mode
+    !             chi_re(mode)%f1(1,nrp) = chi_re(mode)%f1(1,nrp) + chi_re(mode)%f1(1,nrp+2)
+    !             chi_im(mode)%f1(1,nrp) = chi_im(mode)%f1(1,nrp) + chi_im(mode)%f1(1,nrp+2)
+    !         enddo
+    !     endif
+    ! #endif
     do j = 0, nrp + 1
-        ir = 1.0 / ( j + noff - 1 )
-        cu0(1:3,j)  = cu0(1:3,j)  * ir
-        dcu0(1:2,j) = dcu0(1:2,j) * ir
-        amu0(1:3,j) = amu0(1:3,j) * ir
+      ir = 1.0 / ( j + noff - 1 )
+      chi_re(0)%f1(1,j) = chi_re(0)%f1(1,j) * ir
     enddo
 
     do mode = 1, max_mode
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-      do j = 0, nrp+1
+      do j = 0, nrp + 1
         ir = 1.0 / ( j + noff - 1 )
-        cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
-        dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
-        amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+        chi_re(mode)%f1(1,j) = chi_re(mode)%f1(1,j) * ir
+        chi_im(mode)%f1(1,j) = chi_im(mode)%f1(1,j) * ir
       enddo
     enddo
 
@@ -1304,1285 +1343,1714 @@ subroutine amjdeposit_std_pgc_part2d(this, ef, bf, af, cu, amu, dcu, dt_)
 
   call stop_tprof( 'deposit 2D particles' )
   call write_dbg(cls_name, sname, cls_level, 'ends')
-  
-end subroutine amjdeposit_std_pgc_part2d
 
-subroutine amjdeposit_robust_pgc_part2d(this, ef, bf, af, cu, amu, dcu, dt_)
-! deposit the current, acceleration and momentum flux
-
-  implicit none
-
-  class(part2d), intent(inout) :: this
-  class(field), intent(in) :: cu, amu, dcu
-  class(field), intent(in) :: ef, bf
-  class(field_laser), intent(in) :: af
-  real, intent(in), optional :: dt_
-  ! local data
-  character(len=32), save :: sname = 'amjdeposit_robust_pgc_part2d'
-  type(ufield), dimension(:), pointer :: ef_re => null(), ef_im => null()
-  type(ufield), dimension(:), pointer :: bf_re => null(), bf_im => null()
-  type(ufield), dimension(:), pointer :: cu_re => null(), cu_im => null()
-  type(ufield), dimension(:), pointer :: dcu_re => null(), dcu_im => null()
-  type(ufield), dimension(:), pointer :: amu_re => null(), amu_im => null()
-  type(ufield), dimension(:), pointer :: ar_re, ar_im, ai_re, ai_im
-  type(ufield), dimension(:), pointer :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
-
-  real, dimension(:,:), pointer :: cu0 => null(), dcu0 => null(), amu0 => null()
-  real, dimension(:,:), pointer :: cur => null(), dcur => null(), amur => null()
-  real, dimension(:,:), pointer :: cui => null(), dcui => null(), amui => null()
-
-  integer(kind=LG) :: ptrcur, pp
-  integer :: i, j, noff, nrp, np, mode, max_mode
-  integer, dimension(p_cache_size) :: ix
-  real, dimension(p_p_dim, p_cache_size) :: bp, ep, wp, u0, u
-  real, dimension(p_cache_size) :: apr, api
-  real, dimension(3,p_cache_size) :: apr_grad, api_grad
-  real, dimension(0:1, p_cache_size) :: weight
-  real, dimension(p_cache_size) :: pcos, psin
-  real, dimension(p_p_dim) :: du, u2, utmp
-  real :: qtmh, qtmh_e, qtmh_b, idt, gam, ostq, ipsi, dpsi, w, ir, gam_corr, tmp, dt
-  complex(kind=DB) :: phase, phase0
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'deposit 2D particles' )
-
-  ef_re  => ef%get_rf_re();  ef_im  => ef%get_rf_im()
-  bf_re  => bf%get_rf_re();  bf_im  => bf%get_rf_im()
-  cu_re  => cu%get_rf_re();  cu_im  => cu%get_rf_im()
-  dcu_re => dcu%get_rf_re(); dcu_im => dcu%get_rf_im()
-  amu_re => amu%get_rf_re(); amu_im => amu%get_rf_im()
-
-  dt = this%dt
-  if (present(dt_)) dt = dt_
-
-  idt = 1.0 / dt
-  qtmh = 0.5 * this%qbm * dt
-  max_mode = ef%get_max_mode()
-
-  noff = cu_re(0)%get_noff(1)
-  nrp  = cu_re(0)%get_ndp(1)
-
-  cu0  => cu_re(0)%get_f1()
-  dcu0 => dcu_re(0)%get_f1()
-  amu0 => amu_re(0)%get_f1()
+end subroutine CHIDEPOSIT
 
-  ar_re => af%get_cfr_re()
-  ar_im => af%get_cfr_im()
-  ai_re => af%get_cfi_re()
-  ai_im => af%get_cfi_im()
 
-  ar_grad_re => af%ar_grad_re
-  ar_grad_im => af%ar_grad_im
-  ai_grad_re => af%ai_grad_re
-  ai_grad_im => af%ai_grad_im
-
-  do ptrcur = 1, this%npp, p_cache_size
-
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
-
-    ! interpolate fields to particles
-    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, ix, pcos, psin)
-    call interp_field(ef_re, ef_im, max_mode, weight, ix, pcos, psin, np, ep)
-    call interp_field(bf_re, bf_im, max_mode, weight, ix, pcos, psin, np, bp)
-    call interp_field(ar_re, ar_im, max_mode, weight, ix, pcos, psin, np, apr)
-    call interp_field(ai_re, ai_im, max_mode, weight, ix, pcos, psin, np, api)
-    call interp_field(ar_grad_re, ar_grad_im, max_mode, weight, ix, pcos, psin, np, apr_grad)
-    call interp_field(ai_grad_re, ai_grad_im, max_mode, weight, ix, pcos, psin, np, api_grad)
-
-    ! advance the particle momentum
-    pp = ptrcur
-    do i = 1, np
-
-      ! calculate the correction factor for gamma due to ponderomotive force
-      gam_corr = 0.5 * this%qbm**2 * ( apr(i)**2 + api(i)**2 )
-
-      ! transform momentum from Cartesian to cylindrical coordinates
-      u0(1,i) = this%p(1,pp) * pcos(i) + this%p(2,pp) * psin(i)
-      u0(2,i) = this%p(2,pp) * pcos(i) - this%p(1,pp) * psin(i)
-      u0(3,i) = this%p(3,pp)
-
-      ! calculate the averaged gamma factor
-      gam = sqrt( 1.0 + u0(1,i)**2 + u0(2,i)**2 + u0(3,i)**2 + gam_corr )
-
-      ! calculate wake field
-      wp(1,i) = ep(1,i) - bp(2,i)
-      wp(2,i) = ep(2,i) + bp(1,i)
-      wp(3,i) = ep(3,i)
-
-      ! calculate the effective electric fields
-      ! note that here the time-centered gamma is unknown, so we approximate it using the initial one.
-      tmp = 0.5 * this%qbm / gam
-      ep(1,i) = ep(1,i) - tmp * ( apr(i) * apr_grad(1,i) + api(i) * api_grad(1,i) )
-      ep(2,i) = ep(2,i) - tmp * ( apr(i) * apr_grad(2,i) + api(i) * api_grad(2,i) )
-      ep(3,i) = ep(3,i) + tmp * ( apr(i) * apr_grad(3,i) + api(i) * api_grad(3,i) )
-
-      ! half electric acceleration
-      qtmh_e = qtmh * gam / ( gam - u0(3,i) )
-      utmp(:) = u0(:,i) + ep(:,i) * qtmh_e
-
-      ! magnetic rotation
-      gam = sqrt( 1.0 + utmp(1)**2 + utmp(2)**2 + utmp(3)**2 + gam_corr )
-      qtmh_b = qtmh / ( gam - utmp(3) )
-      bp(:,i) = bp(:,i) * qtmh_b
-
-      u(1,i) = utmp(1) + utmp(2) * bp(3,i) - utmp(3) * bp(2,i)
-      u(2,i) = utmp(2) + utmp(3) * bp(1,i) - utmp(1) * bp(3,i)
-      u(3,i) = utmp(3) + utmp(1) * bp(2,i) - utmp(2) * bp(1,i)
-
-      ostq = 2.0 / ( 1.0 + bp(1,i)**2 + bp(2,i)**2 + bp(3,i)**2 )
-      bp(:,i) = bp(:,i) * ostq
-
-      utmp(1) = utmp(1) + u(2,i) * bp(3,i) - u(3,i) * bp(2,i)
-      utmp(2) = utmp(2) + u(3,i) * bp(1,i) - u(1,i) * bp(3,i)
-      utmp(3) = utmp(3) + u(1,i) * bp(2,i) - u(2,i) * bp(1,i)
-
-      ! half electric acceleration
-      gam = sqrt( 1.0 + utmp(1)**2 + utmp(2)**2 + utmp(3)**2 + gam_corr )
-      qtmh_e = qtmh * gam / ( gam - utmp(3) )
-      u(:,i) = utmp(:) + ep(:,i) * qtmh_e
-
-      ! calculate and store time-centered values
-      ! deposit momentum flux, acceleration density, and current density
-      du(1) = idt * ( u(1,i) - u0(1,i) )
-      du(2) = idt * ( u(2,i) - u0(2,i) )
-      
-      u(:,i)  = 0.5 * ( u(:,i) + u0(:,i) )
-      ! store the time-centered gamma and (1-(q/m)*psi)
-      this%gamma(pp) = sqrt( 1.0 + u(1,i)**2 + u(2,i)**2 + u(3,i)**2 + gam_corr )
-      ipsi = 1.0 / (this%gamma(pp) - u(3,i))
-      this%psi(pp) = (1.0 - 1.0 / ipsi) / this%qbm
-
-      dpsi = this%qbm * ( wp(3,i) - ( wp(1,i) * u(1,i) + wp(2,i) * u(2,i) ) * ipsi )
-
-      du(1) = du(1) + u(1,i) * dpsi * ipsi
-      du(2) = du(2) + u(2,i) * dpsi * ipsi
-
-      u2(1) = u(1,i) * u(1,i) * ipsi
-      u2(2) = u(1,i) * u(2,i) * ipsi
-      u2(3) = u(2,i) * u(2,i) * ipsi
-
-      phase0 = cmplx( pcos(i), -psin(i) )
-      phase  = cmplx( 1.0, 0.0 ) * this%q(pp) * ipsi
-
-      ! deposit m = 0 mode
-      do j = 0, 1
-        w = weight(j,i) * real(phase)
-        cu0( 1:3, ix(i)+j )  = cu0( 1:3, ix(i)+j )  + w * u(1:3,i)
-        dcu0( 1:2, ix(i)+j ) = dcu0( 1:2, ix(i)+j ) + w * du(1:2)
-        amu0( 1:3, ix(i)+j ) = amu0( 1:3, ix(i)+j ) + w * u2(1:3)
-      enddo
-
-      ! deposit m > 0 mode
-      do mode = 1, max_mode
-
-        cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-        dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-        amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-        phase = phase * phase0
-
-        do j = 0, 1
-          w = weight(j,i) * real(phase)
-          cur( 1:3, ix(i)+j )  = cur( 1:3, ix(i)+j )  + w * u(1:3,i)
-          dcur( 1:2, ix(i)+j ) = dcur( 1:2, ix(i)+j ) + w * du(1:2)
-          amur( 1:3, ix(i)+j ) = amur( 1:3, ix(i)+j ) + w * u2(1:3)
-
-          w = weight(j,i) * aimag(phase)
-          cui( 1:3, ix(i)+j )  = cui( 1:3, ix(i)+j )  + w * u(1:3,i)
-          dcui( 1:2, ix(i)+j ) = dcui( 1:2, ix(i)+j ) + w * du(1:2)
-          amui( 1:3, ix(i)+j ) = amui( 1:3, ix(i)+j ) + w * u2(1:3)
+subroutine AMJ_STD(this, ef, bf, cu, amu, dcu, dt_)
+    ! deposit the current, acceleration and momentum flux
+    
+      implicit none
+    
+      class(part2d), intent(inout) :: this
+      class(field), intent(in) :: cu, amu, dcu
+      class(field), intent(in) :: ef, bf
+      real, intent(in), optional :: dt_
+      ! local data
+      character(len=32), save :: sname = 'amjdeposit_std_part2d'
+      type(ufield), dimension(:), pointer :: ef_re => null(), ef_im => null()
+      type(ufield), dimension(:), pointer :: bf_re => null(), bf_im => null()
+      type(ufield), dimension(:), pointer :: cu_re => null(), cu_im => null()
+      type(ufield), dimension(:), pointer :: dcu_re => null(), dcu_im => null()
+      type(ufield), dimension(:), pointer :: amu_re => null(), amu_im => null()
+    
+      real, dimension(:,:), pointer :: cu0 => null(), dcu0 => null(), amu0 => null()
+      real, dimension(:,:), pointer :: cur => null(), dcur => null(), amur => null()
+      real, dimension(:,:), pointer :: cui => null(), dcui => null(), amui => null()
+    
+      integer(kind=LG) :: ptrcur, pp
+      integer :: i, j, noff, nrp, np, mode, max_mode
+      integer, dimension(p_cache_size) :: ix
+      real, dimension(p_p_dim, p_cache_size) :: bp, ep, wp, u0, u
+      real, dimension(:, :), allocatable :: weight
+      real, dimension(p_cache_size) :: pcos, psin
+      real, dimension(p_p_dim) :: du, u2, utmp
+      real :: qtmh, qtmh1, qtmh2, idt, gam, ostq, ipsi, dpsi, w, ir, dt
+      complex(kind=DB) :: phase, phase0
+    
+      call write_dbg(cls_name, sname, cls_level, 'starts')
+      call start_tprof( 'deposit 2D particles' )
+      allocate(weight(LP:UP, p_cache_size))
+    
+      ef_re  => ef%get_rf_re();  ef_im  => ef%get_rf_im()
+      bf_re  => bf%get_rf_re();  bf_im  => bf%get_rf_im()
+      cu_re  => cu%get_rf_re();  cu_im  => cu%get_rf_im()
+      dcu_re => dcu%get_rf_re(); dcu_im => dcu%get_rf_im()
+      amu_re => amu%get_rf_re(); amu_im => amu%get_rf_im()
+    
+      dt = this%dt
+      if (present(dt_)) dt = dt_
+    
+      idt = 1.0 / dt
+      qtmh = 0.5 * this%qbm * dt
+      max_mode = ef%get_max_mode()
+    
+      noff = cu_re(0)%get_noff(1)
+      nrp  = cu_re(0)%get_ndp(1)
+    
+      cu0  => cu_re(0)%get_f1()
+      dcu0 => dcu_re(0)%get_f1()
+      amu0 => amu_re(0)%get_f1()
+    
+      do ptrcur = 1, this%npp, p_cache_size
+    
+        ! check if last copy of table and set np
+        if( ptrcur + p_cache_size > this%npp ) then
+          np = this%npp - ptrcur + 1
+        else
+          np = p_cache_size
+        endif
+    
+        ! interpolate fields to particles
+        call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, ix, pcos, psin)
+        call interp_field(ef_re, ef_im, max_mode, weight, ix, pcos, psin, np, ep)
+        call interp_field(bf_re, bf_im, max_mode, weight, ix, pcos, psin, np, bp)
+    
+        pp = ptrcur
+        do i = 1, np
+    
+          ! calculate wake field
+          wp(1, i) = ep(1, i) - bp(2, i)
+          wp(2, i) = ep(2, i) + bp(1, i)
+          wp(3, i) = ep(3, i)
+    
+          ! transform momentum from Cartesian to cylindrical coordinates
+          u0(1, i) = this%p(1, pp) * pcos(i) + this%p(2, pp) * psin(i)
+          u0(2, i) = this%p(2, pp) * pcos(i) - this%p(1, pp) * psin(i)
+          u0(3, i) = this%p(3, pp)
+    
+          ! TODO: There is a strict time-centered scheme and it is left for future work.
+    
+          ! normalize the E and B fields
+          gam = sqrt(1.0 + u0(1, i)**2 + u0(2, i)**2 + u0(3, i)**2)
+          qtmh2 = qtmh / (1.0 - this%qbm * this%psi(pp))
+          qtmh1 = qtmh2 * gam
+          bp(:, i) = bp(:, i) * qtmh2
+    
+          ! half electric acceleration
+          utmp(:) = u0(:, i) + ep(:, i) * qtmh1
+    
+          ! magnetic rotation
+          u(1, i) = utmp(1) + utmp(2) * bp(3, i) - utmp(3) * bp(2, i)
+          u(2, i) = utmp(2) + utmp(3) * bp(1, i) - utmp(1) * bp(3, i)
+          u(3, i) = utmp(3) + utmp(1) * bp(2, i) - utmp(2) * bp(1, i)
+    
+          ostq = 2.0 / (1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2)
+          bp(:,i) = bp(:,i) * ostq
+    
+          utmp(1) = utmp(1) + u(2, i) * bp(3, i) - u(3, i) * bp(2, i)
+          utmp(2) = utmp(2) + u(3, i) * bp(1, i) - u(1, i) * bp(3, i)
+          utmp(3) = utmp(3) + u(1, i) * bp(2, i) - u(2, i) * bp(1, i)
+    
+          ! half electric acceleration
+          gam = sqrt(1.0 + utmp(1)**2 + utmp(2)**2 + utmp(3)**2)
+          qtmh1 = qtmh2 * gam
+          u(:, i) = utmp(:) + ep(:, i) * qtmh1
+    
+          pp = pp + 1
         enddo
-
+    
+        ! calculate and store time-centered values
+        ! deposit momentum flux, acceleration density, and current density
+        pp = ptrcur
+        do i = 1, np
+    
+          du(1) = idt * ( u(1,i) - u0(1,i) )
+          du(2) = idt * ( u(2,i) - u0(2,i) )
+          
+          u(:, i)  = 0.5 * ( u(:, i) + u0(:, i) )
+          this%gamma(pp) = sqrt(1.0 + u(1, i)**2 + u(2, i)**2 + u(3, i)**2)
+    
+          ipsi = 1.0 / (1.0 - this%qbm * this%psi(pp))
+          dpsi = this%qbm * (wp(3, i) - (wp(1, i) * u(1, i) + wp(2, i) * u(2, i)) * ipsi)
+    
+          du(1) = du(1) + u(1, i) * dpsi * ipsi
+          du(2) = du(2) + u(2, i) * dpsi * ipsi
+    
+          u2(1) = u(1, i) * u(1, i) * ipsi
+          u2(2) = u(1, i) * u(2, i) * ipsi
+          u2(3) = u(2, i) * u(2, i) * ipsi
+    
+          phase0 = cmplx(pcos(i), -psin(i))
+          phase  = cmplx(1.0, 0.0) * this%q(pp) * ipsi
+    
+          ! deposit m = 0 mode
+          do j = LP, UP
+            w = weight(j, i) * real(phase)
+            cu0(1:3, ix(i) + j)  = cu0(1:3, ix(i) + j)  + w * u(1:3, i)
+            dcu0(1:2, ix(i) + j) = dcu0(1:2, ix(i) + j) + w * du(1:2)
+            amu0(1:3, ix(i) + j) = amu0(1:3, ix(i) + j) + w * u2(1:3)
+          enddo
+    
+          ! deposit m > 0 mode
+          do mode = 1, max_mode
+    
+            cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+            dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+            amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+            phase = phase * phase0
+    
+            do j = LP, UP
+              w = weight(j,i) * real(phase)
+              cur( 1:3, ix(i)+j )  = cur( 1:3, ix(i)+j )  + w * u(1:3,i)
+              dcur( 1:2, ix(i)+j ) = dcur( 1:2, ix(i)+j ) + w * du(1:2)
+              amur( 1:3, ix(i)+j ) = amur( 1:3, ix(i)+j ) + w * u2(1:3)
+    
+              w = weight(j,i) * aimag(phase)
+              cui( 1:3, ix(i)+j )  = cui( 1:3, ix(i)+j )  + w * u(1:3,i)
+              dcui( 1:2, ix(i)+j ) = dcui( 1:2, ix(i)+j ) + w * du(1:2)
+              amui( 1:3, ix(i)+j ) = amui( 1:3, ix(i)+j ) + w * u2(1:3)
+            enddo
+    
+          enddo
+    
+          pp = pp + 1
+        enddo
+    
       enddo
-
-      pp = pp + 1
-    enddo
-
-  enddo
-
-  if ( noff == 0 ) then
-
-    ! guard cells on the axis are useless
-    cu0(1:3,0)  = 0.0
-    dcu0(1:2,0) = 0.0
-    amu0(1:3,0) = 0.0
-
-    cu0(1:2,1)  = 0.0; cu0(3,1) = 8.0 * cu0(3,1)
-    dcu0(1:2,1) = 0.0
-    amu0(1:3,1) = 0.0
-
-    do mode = 1, max_mode
-
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-      ! guard cells on the axis are useless
-      cur(1:3,0)  = 0.0; cui(1:3,0)  = 0.0
-      dcur(1:2,0) = 0.0; dcui(1:2,0) = 0.0
-      amur(1:3,0) = 0.0; amui(1:3,0) = 0.0
-        
-      if ( mode == 1 ) then
-        cur(1:2,1)  = 8.0 * cur(1:2,1); cur(3,1) = 0.0
-        dcur(1:2,1) = 8.0 * dcur(1:2,1)
-        amur(1:3,1) = 0.0
-        cui(1:2,1)  = 8.0 * cui(1:2,1); cui(3,1) = 0.0
-        dcui(1:2,1) = 8.0 * dcui(1:2,1)
-        amui(1:3,1) = 0.0
-      elseif ( mode == 2 ) then
-        cur(1:3,1)  = 0.0
-        dcur(1:2,1) = 0.0
-        amur(1:3,1) = 8.0 * amur(1:3,1)
-        cui(1:3,1)  = 0.0
-        dcui(1:2,1) = 0.0
-        amui(1:3,1) = 8.0 * amui(1:3,1)
+    
+      if ( id_proc_loc() ==  0 ) then
+! Correct the used values on axis by adding the value on the guard cell 0
+#ifdef QUADRATIC
+    cu0(3,1) = cu0(3,1) + cu0(3,0)
+#endif
+        ! guard cells on the axis are useless
+        cu0(1:3,0)  = 0.0
+        dcu0(1:2,0) = 0.0
+        amu0(1:3,0) = 0.0
+    
+        cu0(1:2,1)  = 0.0; cu0(3,1) = 8.0 * cu0(3,1)
+        dcu0(1:2,1) = 0.0
+        amu0(1:3,1) = 0.0
+    
+        do mode = 1, max_mode
+    
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          ! guard cells on the axis are useless
+          cur(1:3,0)  = 0.0; cui(1:3,0)  = 0.0
+          dcur(1:2,0) = 0.0; dcui(1:2,0) = 0.0
+          amur(1:3,0) = 0.0; amui(1:3,0) = 0.0
+           
+          if ( mode == 1 ) then
+#ifdef QUADRATIC
+          cur(1:2,1) = cur(1:2,1) + cur(1:2,0)
+          dcur(1:2,1) = dcur(1:2,1) + dcur(1:2,0)
+          cui(1:2,1) = cui(1:2,1) + cui(1:2,0)
+          dcui(1:2,1) = dcui(1:2,1) + dcui(1:2,0)
+#endif
+            cur(1:2,1)  = 8.0 * cur(1:2,1); cur(3,1) = 0.0
+            dcur(1:2,1) = 8.0 * dcur(1:2,1)
+            amur(1:3,1) = 0.0
+            cui(1:2,1)  = 8.0 * cui(1:2,1); cui(3,1) = 0.0
+            dcui(1:2,1) = 8.0 * dcui(1:2,1)
+            amui(1:3,1) = 0.0
+          elseif ( mode == 2 ) then
+#ifdef QUADRATIC
+            amur(1:3,1) = amur(1:3,1) + amur(1:3,0)
+            amui(1:3,1) = amui(1:3,1) + amui(1:3,0)
+#endif
+            cur(1:3,1)  = 0.0
+            dcur(1:2,1) = 0.0
+            amur(1:3,1) = 8.0 * amur(1:3,1)
+            cui(1:3,1)  = 0.0
+            dcui(1:2,1) = 0.0
+            amui(1:3,1) = 8.0 * amui(1:3,1)
+          else
+            cur(1:3,1)  = 0.0
+            dcur(1:2,1) = 0.0
+            amur(1:3,1) = 0.0
+            cui(1:3,1)  = 0.0
+            dcui(1:2,1) = 0.0
+            amui(1:3,1) = 0.0
+           endif
+        enddo
+    
+        do j = 2, nrp + 1
+          ir = 1.0 / ( j + noff - 1 )
+          cu0(1:3,j)  = cu0(1:3,j)  * ir
+          dcu0(1:2,j) = dcu0(1:2,j) * ir
+          amu0(1:3,j) = amu0(1:3,j) * ir
+        enddo
+    
+        do mode = 1, max_mode
+    
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          do j = 2, nrp + 1
+            ir = 1.0 / ( j + noff - 1 )
+            cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
+            dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
+            amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+          enddo
+        enddo
+    
       else
-        cur(1:3,1)  = 0.0
-        dcur(1:2,1) = 0.0
-        amur(1:3,1) = 0.0
-        cui(1:3,1)  = 0.0
-        dcui(1:2,1) = 0.0
-        amui(1:3,1) = 0.0
-        endif
-    enddo
-
-    do j = 2, nrp + 1
-      ir = 1.0 / ( j + noff - 1 )
-      cu0(1:3,j)  = cu0(1:3,j)  * ir
-      dcu0(1:2,j) = dcu0(1:2,j) * ir
-      amu0(1:3,j) = amu0(1:3,j) * ir
-    enddo
-
-    do mode = 1, max_mode
-
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-      do j = 2, nrp + 1
-        ir = 1.0 / ( j + noff - 1 )
-        cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
-        dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
-        amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
-      enddo
-    enddo
-
-  else
-
-    do j = 0, nrp + 1
-        ir = 1.0 / ( j + noff - 1 )
-        cu0(1:3,j)  = cu0(1:3,j)  * ir
-        dcu0(1:2,j) = dcu0(1:2,j) * ir
-        amu0(1:3,j) = amu0(1:3,j) * ir
-    enddo
-
-    do mode = 1, max_mode
-      cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
-      dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
-      amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
-
-      do j = 0, nrp+1
-        ir = 1.0 / ( j + noff - 1 )
-        cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
-        dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
-        amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
-      enddo
-    enddo
-
-  endif
-
-  call stop_tprof( 'deposit 2D particles' )
-  call write_dbg(cls_name, sname, cls_level, 'ends')
-  
-end subroutine amjdeposit_robust_pgc_part2d
-
-subroutine interp_emf_part2d( ef_re, ef_im, bf_re, bf_im, max_mode, x, dr, bp, ep, np, ptrcur, &
-  geom, weight, ix, pcos, psin )
-
-  implicit none
-
-  type(ufield), dimension(:), pointer, intent(in) :: ef_re, ef_im, bf_re, bf_im
-  integer, intent(in) :: max_mode, np, geom
-  real, intent(in) :: dr
-  real, dimension(:,:), intent(in) :: x
-  real, dimension(:,:), intent(inout) :: bp, ep
-  integer(kind=LG), intent(in) :: ptrcur
-  real, dimension(:,:), intent(out), optional :: weight
-  integer, dimension(:), intent(out), optional :: ix
-  real, dimension(:), intent(out), optional :: pcos, psin
-
-  real, dimension(:,:), pointer :: e0, b0, er, ei, br, bi
-  integer :: noff, i, j, nn, mode
-  integer(kind=LG) :: pp
-  real :: pos, idr, ph_r, ph_i, cc, ss
-  real, dimension(0:1) :: wt
-  complex(kind=DB) :: phase0, phase
-
-  idr = 1.0 / dr
-  noff = ef_re(0)%get_noff(1)
-
-  e0 => ef_re(0)%get_f1()
-  b0 => bf_re(0)%get_f1()
-
-  ep = 0.0
-  bp = 0.0
-
-  pp = ptrcur
-  do i = 1, np
-    pos = sqrt( x(1,pp)**2 + x(2,pp)**2 ) * idr
-    ! cosine and sine
-    cc = x(1,pp) / pos * idr
-    ss = x(2,pp) / pos * idr
-    phase0 = cmplx( cc, ss )
-
-    ! in-cell position
-    nn  = int( pos )
-    pos = pos - real(nn)
-
-    ! cell index
-    nn = nn - noff + 1
-
-    ! get interpolation weight factor
-    call spline_linear( pos, wt )
-
-    if ( present(weight) ) weight(:,i) = wt
-    if ( present(ix) ) ix(i) = nn
-    if ( present(pcos) ) then
-      pcos(i) = cc
-      psin(i) = ss
-    endif
-
-    ! interpolate m=0 mode
-    do j = 0, 1
-      ep(:,i) = ep(:,i) + e0(:,nn+j) * wt(j)
-      bp(:,i) = bp(:,i) + b0(:,nn+j) * wt(j)
-    enddo
-
-    ! interpolate m>0 modes
-    phase = cmplx( 1.0, 0.0 )
-    do mode = 1, max_mode
-        phase = phase * phase0
-        ph_r = 2.0 * real(phase)
-        ph_i = 2.0 * aimag(phase)
-
-        er => ef_re(mode)%get_f1()
-        ei => ef_im(mode)%get_f1()
-        br => bf_re(mode)%get_f1()
-        bi => bf_im(mode)%get_f1()
-
-        do j = 0, 1
-          ep(:,i) = ep(:,i) + ( er(:,nn+j) * ph_r - ei(:,nn+j) * ph_i ) * wt(j)
-          bp(:,i) = bp(:,i) + ( br(:,nn+j) * ph_r - bi(:,nn+j) * ph_i ) * wt(j)
+! #ifdef QUADRATIC
+!         if (id_proc_loc() == num_procs_loc() - 1) then
+!         ! Correct the values on the boundary by adding the value on the guard cell nrp+2
+!             cu0(1:3, nrp) = cu0(1:3, nrp) + cu0(1:3, nrp+2)
+!             dcu0(1:2, nrp) = dcu0(1:2, nrp) + dcu0(1:2, nrp+2)
+!             amu0(1:3, nrp) = amu0(1:3, nrp) + amu0(1:3, nrp+2)
+            
+!             do mode = 1, max_mode
+!                 cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+!                 dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+!                 amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+
+!                 cur(1:3, nrp) = cur(1:3, nrp) + cur(1:3, nrp+2);  cui(1:3, nrp) = cui(1:3, nrp) + cui(1:3, nrp+2)
+!                 dcur(1:2, nrp) = dcur(1:2, nrp) + dcur(1:2, nrp+2);  dcui(1:2, nrp) = dcui(1:2, nrp) + dcui(1:2, nrp+2)
+!                 amur(1:3, nrp) = amur(1:3, nrp) + amur(1:3, nrp+2);  amui(1:3, nrp) = amui(1:3, nrp) + amui(1:3, nrp+2)
+!             enddo
+!         endif
+! #endif
+        do j = 0, nrp + 1
+           ir = 1.0 / ( j + noff - 1 )
+           cu0(1:3,j)  = cu0(1:3,j)  * ir
+           dcu0(1:2,j) = dcu0(1:2,j) * ir
+           amu0(1:3,j) = amu0(1:3,j) * ir
         enddo
-    enddo
-
-    ! transform from cylindrical geometry to Cartesian geometry
-    if ( geom == p_cartesian ) then
-      ! ph_r, ph_i are temporary variables here
-      ph_r = ep(1,i) * cc - ep(2,i) * ss
-      ph_i = ep(1,i) * ss + ep(2,i) * cc
-      ep(1,i) = ph_r
-      ep(2,i) = ph_i
-
-      ph_r = bp(1,i) * cc - bp(2,i) * ss
-      ph_i = bp(1,i) * ss + bp(2,i) * cc
-      bp(1,i) = ph_r
-      bp(2,i) = ph_i
-    endif
-
-    pp = pp + 1
-  enddo
-
-end subroutine interp_emf_part2d
-
-subroutine interp_laser_part2d( ar_re, ar_im, ai_re, ai_im, ar_grad_re, ar_grad_im, &
-  ai_grad_re, ai_grad_im, max_mode, x, dr, apr, api, apr_grad, api_grad, np, ptrcur, geom )
-
-   implicit none
-
-   type(ufield), dimension(:), pointer, intent(in) :: ar_re, ar_im, ai_re, ai_im
-   type(ufield), dimension(:), pointer, intent(in) :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
-   integer, intent(in) :: max_mode, np, geom
-   real, intent(in) :: dr
-   real, dimension(:,:), intent(in) :: x
-   real, dimension(:), intent(inout) :: apr, api
-   real, dimension(:,:), intent(inout) :: apr_grad, api_grad
-   integer(kind=LG), intent(in) :: ptrcur
-
-   integer :: noff, i, j, nn, m
-   integer(kind=LG) :: pp
-   real :: pos, idr, ph_r, ph_i, cc, ss
-   real, dimension(0:1) :: wt
-   complex :: phase0, phase
-
-   idr = 1.0 / dr
-   noff = ar_re(0)%get_noff(1)
-   apr = 0.0
-   api = 0.0
-   apr_grad = 0.0
-   api_grad = 0.0
-
-   pp = ptrcur
-   do i = 1, np
-      pos = sqrt( x(1,pp)**2 + x(2,pp)**2 ) * idr
-      ! cosine and sine
-      cc = x(1,pp) / pos * idr
-      ss = x(2,pp) / pos * idr
-      phase0 = cmplx( cc, ss )
-
-      ! in-cell position
-      nn  = int( pos )
-      pos = pos - real(nn)
-
-      ! cell index
-      nn = nn - noff + 1
-
-      ! get interpolation weight factor
-      call spline_linear( pos, wt )
+    
+        do mode = 1, max_mode
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          do j = 0, nrp+1
+            ir = 1.0 / ( j + noff - 1 )
+            cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
+            dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
+            amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+          enddo
+        enddo
+    
+      endif
+      
+      call stop_tprof( 'deposit 2D particles' )
+      call write_dbg(cls_name, sname, cls_level, 'ends')
+    
+end subroutine AMJ_STD
 
-      ! interpolate m = 0 mode
-      do j = 0, 1
-        apr(i) = apr(i) + ar_re(0)%f1(1,nn+j) * wt(j)
-        api(i) = api(i) + ai_re(0)%f1(1,nn+j) * wt(j)
-        apr_grad(:,i) = apr_grad(:,i) + ar_grad_re(0)%f1(:,nn+j) * wt(j)
-        api_grad(:,i) = api_grad(:,i) + ai_grad_re(0)%f1(:,nn+j) * wt(j)
-      enddo
 
-      ! interpolate m > 0 modes
-      phase = cmplx( 1.0, 0.0 )
-      do m = 1, max_mode
-         phase = phase * phase0
-         ph_r = 2.0 * real(phase)
-         ph_i = 2.0 * aimag(phase)
-
-         do j = 0, 1
-            apr(i) = apr(i) + ( ar_re(m)%f1(1,nn+j) * ph_r - ar_im(m)%f1(1,nn+j) * ph_i ) * wt(j)
-            api(i) = api(i) + ( ai_re(m)%f1(1,nn+j) * ph_r - ai_im(m)%f1(1,nn+j) * ph_i ) * wt(j)
-            apr_grad(:,i) = apr_grad(:,i) + ( ar_grad_re(m)%f1(:,nn+j) * ph_r - ar_grad_im(m)%f1(:,nn+j) * ph_i ) * wt(j)
-            api_grad(:,i) = api_grad(:,i) + ( ai_grad_re(m)%f1(:,nn+j) * ph_r - ai_grad_im(m)%f1(:,nn+j) * ph_i ) * wt(j)
-         enddo
+subroutine AMJ_ROB(this, ef, bf, cu, amu, dcu, dt_)
+    ! deposit the current, acceleration and momentum flux
+    
+      implicit none
+    
+      class(part2d), intent(inout) :: this
+      class(field), intent(in) :: cu, amu, dcu
+      class(field), intent(in) :: ef, bf
+      real, intent(in), optional :: dt_
+      ! local data
+      character(len=32), save :: sname = 'amjdeposit_robust_part2d'
+      type(ufield), dimension(:), pointer :: ef_re => null(), ef_im => null()
+      type(ufield), dimension(:), pointer :: bf_re => null(), bf_im => null()
+      type(ufield), dimension(:), pointer :: cu_re => null(), cu_im => null()
+      type(ufield), dimension(:), pointer :: dcu_re => null(), dcu_im => null()
+      type(ufield), dimension(:), pointer :: amu_re => null(), amu_im => null()
+    
+      real, dimension(:, :), pointer :: cu0 => null(), dcu0 => null(), amu0 => null()
+      real, dimension(:, :), pointer :: cur => null(), dcur => null(), amur => null()
+      real, dimension(:, :), pointer :: cui => null(), dcui => null(), amui => null()
+    
+      integer(kind=LG) :: ptrcur, pp
+      integer :: i, j, noff, nrp, np, mode, max_mode
+      integer, dimension(p_cache_size) :: ix
+      real, dimension(p_p_dim, p_cache_size) :: bp, ep, wp, u0, u, utmp
+      real, dimension(:, :), allocatable :: weight
+      real, dimension(p_cache_size) :: pcos, psin
+      real, dimension(p_p_dim) :: du, u2
+      real :: qtmh, qtmh1, qtmh2, idt, gam, ostq, ipsi, dpsi, w, ir, dt
+      complex(kind=DB) :: phase, phase0
+    
+      call write_dbg(cls_name, sname, cls_level, 'starts')
+      call start_tprof( 'deposit 2D particles' )
+      allocate(weight(LP:UP, p_cache_size))
+    
+      ef_re  => ef%get_rf_re();  ef_im  => ef%get_rf_im()
+      bf_re  => bf%get_rf_re();  bf_im  => bf%get_rf_im()
+      cu_re  => cu%get_rf_re();  cu_im  => cu%get_rf_im()
+      dcu_re => dcu%get_rf_re(); dcu_im => dcu%get_rf_im()
+      amu_re => amu%get_rf_re(); amu_im => amu%get_rf_im()
+    
+      dt = this%dt
+      if (present(dt_)) dt = dt_
+    
+      idt = 1.0 / dt
+      qtmh = 0.5 * this%qbm * dt
+      max_mode = ef%get_max_mode()
+    
+      noff = cu_re(0)%get_noff(1)
+      nrp  = cu_re(0)%get_ndp(1)
+    
+      cu0  => cu_re(0)%get_f1()
+      dcu0 => dcu_re(0)%get_f1()
+      amu0 => amu_re(0)%get_f1()
+    
+      do ptrcur = 1, this%npp, p_cache_size
+    
+        ! check if last copy of table and set np
+        if( ptrcur + p_cache_size > this%npp ) then
+          np = this%npp - ptrcur + 1
+        else
+          np = p_cache_size
+        endif
+    
+        ! interpolate fields to particles
+        call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, ix, pcos, psin)
+        call interp_field(ef_re, ef_im, max_mode, weight, ix, pcos, psin, np, ep)
+        call interp_field(bf_re, bf_im, max_mode, weight, ix, pcos, psin, np, bp)
+    
+        ! calculate wake field
+        pp = ptrcur
+        do i = 1, np
+          wp(1, i) = ep(1, i) - bp(2, i)
+          wp(2, i) = ep(2, i) + bp(1, i)
+          wp(3, i) = ep(3, i)
+    
+          ! transform momentum from Cartesian to cylindrical coordinates
+          u0(1, i) = this%p(1, pp) * pcos(i) + this%p(2, pp) * psin(i)
+          u0(2, i) = this%p(2, pp) * pcos(i) - this%p(1, pp) * psin(i)
+          u0(3, i) = this%p(3, pp)
+    
+          ! half electric acceleration
+          gam = sqrt(1.0 + u0(1, i)**2 + u0(2, i)**2 + u0(3, i)**2)
+          qtmh1 = qtmh * gam / (gam - u0(3, i))
+          ep(:, i) = ep(:, i) * qtmh1
+          utmp(:, i) = u0(:, i) + ep(:, i)
+    
+          ! scale magnetic field
+          gam = sqrt(1.0 + utmp(1, i)**2 + utmp(2, i)**2 + utmp(3, i)**2)
+          qtmh2 = qtmh / (gam - utmp(3, i))
+          bp(:, i) = bp(:, i) * qtmh2
+    
+          ! magnetic rotation
+          u(1, i) = utmp(1, i) + utmp(2, i) * bp(3, i) - utmp(3, i) * bp(2, i)
+          u(2, i) = utmp(2, i) + utmp(3, i) * bp(1, i) - utmp(1, i) * bp(3, i)
+          u(3, i) = utmp(3, i) + utmp(1, i) * bp(2, i) - utmp(2, i) * bp(1, i)
+    
+          ostq = 2.0 / (1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2)
+          bp(:, i) = bp(:, i) * ostq
+    
+          utmp(1, i) = utmp(1, i) + u(2, i) * bp(3, i) - u(3, i) * bp(2, i)
+          utmp(2, i) = utmp(2, i) + u(3, i) * bp(1, i) - u(1, i) * bp(3, i)
+          utmp(3, i) = utmp(3, i) + u(1, i) * bp(2, i) - u(2, i) * bp(1, i)
+    
+          ! half electric acceleration
+          u(:, i) = utmp(:, i) + ep(:, i)
+    
+          pp = pp + 1
+        enddo
+    
+        ! calculate and store time-centered values
+        ! deposit momentum flux, acceleration density, and current density
+        pp = ptrcur
+        do i = 1, np
+    
+          du(1) = idt * ( u(1,i) - u0(1,i) )
+          du(2) = idt * ( u(2,i) - u0(2,i) )
+          
+          u(:,i)  = 0.5 * ( u(:,i) + u0(:,i) )
+          this%gamma(pp) = sqrt( 1.0 + u(1,i)**2 + u(2,i)**2 + u(3,i)**2 )
+          ipsi = 1.0 / (this%gamma(pp) - u(3,i))
+          this%psi(pp) = (1.0 - 1.0 / ipsi) / this%qbm
+    
+          dpsi = this%qbm * (wp(3, i) - (wp(1, i) * u(1, i) + wp(2, i) * u(2, i)) * ipsi)
+    
+          du(1) = du(1) + u(1, i) * dpsi * ipsi
+          du(2) = du(2) + u(2, i) * dpsi * ipsi
+    
+          u2(1) = u(1, i) * u(1, i) * ipsi
+          u2(2) = u(1, i) * u(2, i) * ipsi
+          u2(3) = u(2, i) * u(2, i) * ipsi
+    
+          phase0 = cmplx( pcos(i), -psin(i) )
+          phase  = cmplx( 1.0, 0.0 ) * this%q(pp) * ipsi
+    
+          ! deposit m = 0 mode
+          do j = LP, UP
+            w = weight(j,i) * real(phase)
+            cu0( 1:3, ix(i)+j )  = cu0( 1:3, ix(i)+j )  + w * u(1:3,i)
+            dcu0( 1:2, ix(i)+j ) = dcu0( 1:2, ix(i)+j ) + w * du(1:2)
+            amu0( 1:3, ix(i)+j ) = amu0( 1:3, ix(i)+j ) + w * u2(1:3)
+          enddo
+    
+          ! deposit m > 0 mode
+          do mode = 1, max_mode
+    
+            cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+            dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+            amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+            phase = phase * phase0
+    
+            do j = LP, UP
+              w = weight(j,i) * real(phase)
+              cur( 1:3, ix(i)+j )  = cur( 1:3, ix(i)+j )  + w * u(1:3,i)
+              dcur( 1:2, ix(i)+j ) = dcur( 1:2, ix(i)+j ) + w * du(1:2)
+              amur( 1:3, ix(i)+j ) = amur( 1:3, ix(i)+j ) + w * u2(1:3)
+    
+              w = weight(j,i) * aimag(phase)
+              cui( 1:3, ix(i)+j )  = cui( 1:3, ix(i)+j )  + w * u(1:3,i)
+              dcui( 1:2, ix(i)+j ) = dcui( 1:2, ix(i)+j ) + w * du(1:2)
+              amui( 1:3, ix(i)+j ) = amui( 1:3, ix(i)+j ) + w * u2(1:3)
+            enddo
+    
+          enddo
+    
+          pp = pp + 1
+        enddo
+    
       enddo
-
-      ! transform from cylindrical geometry to Cartesian geometry
-      if ( geom == p_cartesian ) then
-        ! ph_r, ph_i are temporary variables here
-        ph_r = apr_grad(1,i) * cc - apr_grad(2,i) * ss
-        ph_i = apr_grad(1,i) * ss + apr_grad(2,i) * cc
-        apr_grad(1,i) = ph_r
-        apr_grad(2,i) = ph_i
-
-        ph_r = api_grad(1,i) * cc - api_grad(2,i) * ss
-        ph_i = api_grad(1,i) * ss + api_grad(2,i) * cc
-        api_grad(1,i) = ph_r
-        api_grad(2,i) = ph_i
+    
+      if ( id_proc_loc() ==  0 ) then
+! Correct the used values on axis by adding the value on the guard cell 0
+#ifdef QUADRATIC
+        cu0(3,1) = cu0(3,1) + cu0(3,0)
+#endif
+        ! guard cells on the axis are useless
+        cu0(1:3,0)  = 0.0
+        dcu0(1:2,0) = 0.0
+        amu0(1:3,0) = 0.0
+    
+        cu0(1:2,1)  = 0.0; cu0(3,1) = 8.0 * cu0(3,1)
+        dcu0(1:2,1) = 0.0
+        amu0(1:3,1) = 0.0
+    
+        do mode = 1, max_mode
+    
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          ! guard cells on the axis are useless
+          cur(1:3,0)  = 0.0; cui(1:3,0)  = 0.0
+          dcur(1:2,0) = 0.0; dcui(1:2,0) = 0.0
+          amur(1:3,0) = 0.0; amui(1:3,0) = 0.0
+           
+          if ( mode == 1 ) then
+#ifdef QUADRATIC
+            cur(1:2,1) = cur(1:2,1) + cur(1:2,0)
+            dcur(1:2,1) = dcur(1:2,1) + dcur(1:2,0)
+            cui(1:2,1) = cui(1:2,1) + cui(1:2,0)
+            dcui(1:2,1) = dcui(1:2,1) + dcui(1:2,0)
+  #endif
+            cur(1:2,1)  = 8.0 * cur(1:2,1); cur(3,1) = 0.0
+            dcur(1:2,1) = 8.0 * dcur(1:2,1)
+            amur(1:3,1) = 0.0
+            cui(1:2,1)  = 8.0 * cui(1:2,1); cui(3,1) = 0.0
+            dcui(1:2,1) = 8.0 * dcui(1:2,1)
+            amui(1:3,1) = 0.0
+          elseif ( mode == 2 ) then
+#ifdef QUADRATIC
+            amur(1:3,1) = amur(1:3,1) + amur(1:3,0)
+            amui(1:3,1) = amui(1:3,1) + amui(1:3,0)
+#endif
+            cur(1:3,1)  = 0.0
+            dcur(1:2,1) = 0.0
+            amur(1:3,1) = 8.0 * amur(1:3,1)
+            cui(1:3,1)  = 0.0
+            dcui(1:2,1) = 0.0
+            amui(1:3,1) = 8.0 * amui(1:3,1)
+          else
+            cur(1:3,1)  = 0.0
+            dcur(1:2,1) = 0.0
+            amur(1:3,1) = 0.0
+            cui(1:3,1)  = 0.0
+            dcui(1:2,1) = 0.0
+            amui(1:3,1) = 0.0
+           endif
+        enddo
+    
+        do j = 2, nrp + 1
+          ir = 1.0 / ( j + noff - 1 )
+          cu0(1:3,j)  = cu0(1:3,j)  * ir
+          dcu0(1:2,j) = dcu0(1:2,j) * ir
+          amu0(1:3,j) = amu0(1:3,j) * ir
+        enddo
+    
+        do mode = 1, max_mode
+    
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          do j = 2, nrp + 1
+            ir = 1.0 / ( j + noff - 1 )
+            cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
+            dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
+            amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+          enddo
+        enddo
+    
+      else
+! #ifdef QUADRATIC
+!         if (id_proc_loc() == num_procs_loc() - 1) then
+!         ! Correct the values on the boundary by adding the value on the guard cell nrp+2
+!             cu0(1:3, nrp) = cu0(1:3, nrp) + cu0(1:3, nrp+2)
+!             dcu0(1:2, nrp) = dcu0(1:2, nrp) + dcu0(1:2, nrp+2)
+!             amu0(1:3, nrp) = amu0(1:3, nrp) + amu0(1:3, nrp+2)
+            
+!             do mode = 1, max_mode
+!                 cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+!                 dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+!                 amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+
+!                 cur(1:3, nrp) = cur(1:3, nrp) + cur(1:3, nrp+2);  cui(1:3, nrp) = cui(1:3, nrp) + cui(1:3, nrp+2)
+!                 dcur(1:2, nrp) = dcur(1:2, nrp) + dcur(1:2, nrp+2);  dcui(1:2, nrp) = dcui(1:2, nrp) + dcui(1:2, nrp+2)
+!                 amur(1:3, nrp) = amur(1:3, nrp) + amur(1:3, nrp+2);  amui(1:3, nrp) = amui(1:3, nrp) + amui(1:3, nrp+2)
+!             enddo
+!         endif
+! #endif
+        do j = 0, nrp + 1
+           ir = 1.0 / ( j + noff - 1 )
+           cu0(1:3,j)  = cu0(1:3,j)  * ir
+           dcu0(1:2,j) = dcu0(1:2,j) * ir
+           amu0(1:3,j) = amu0(1:3,j) * ir
+        enddo
+    
+        do mode = 1, max_mode
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          do j = 0, nrp+1
+            ir = 1.0 / ( j + noff - 1 )
+            cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
+            dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
+            amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+          enddo
+        enddo
+    
       endif
+      
+      call stop_tprof( 'deposit 2D particles' )
+      call write_dbg(cls_name, sname, cls_level, 'ends')
+    
+end subroutine AMJ_ROB
 
-      pp = pp + 1
-    enddo
-
-end subroutine interp_laser_part2d
-
-subroutine push_u_std_part2d(this, ef, bf, dt_)
-
-  implicit none
-
-  class(part2d), intent(inout) :: this
-  class(field), intent(in) :: ef, bf
-  real, intent(in), optional :: dt_
-  ! local data
-  character(len=18), save :: sname = 'push_u_std_part2d'
-  type(ufield), dimension(:), pointer :: ef_re, ef_im, bf_re, bf_im
-  integer :: i, np, max_mode, noff
-  real :: qtmh, qtmh1, qtmh2, gam, dtc, ostq, dt
-  real, dimension(0:1, p_cache_size) :: weight
-  real, dimension(p_cache_size) :: pcos, psin
-  integer, dimension(p_cache_size) :: idx
-  real, dimension(p_p_dim, p_cache_size) :: bp, ep, utmp
-  integer(kind=LG) :: ptrcur, pp
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'push 2D particles' )
-
-  dt = this%dt
-  if (present(dt_)) dt = dt_
-  qtmh = this%qbm * dt * 0.5
-  max_mode = ef%get_max_mode()
-
-  ef_re => ef%get_rf_re()
-  ef_im => ef%get_rf_im()
-  bf_re => bf%get_rf_re()
-  bf_im => bf%get_rf_im()
-  noff = ef_re(0)%get_noff(1)
-
-  do ptrcur = 1, this%npp, p_cache_size
-
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
-
-    ! interpolate fields to particles
-    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
-    call interp_field(ef_re, ef_im, max_mode, weight, idx, pcos, psin, np, ep)
-    call interp_field(bf_re, bf_im, max_mode, weight, idx, pcos, psin, np, bp)
-    call transform_to_cartesian(ep, np, pcos, psin)
-    call transform_to_cartesian(bp, np, pcos, psin)
-
-    pp = ptrcur
-    do i = 1, np
-
-      ! normalize E and B fields
-      qtmh1 = qtmh / (1.0 - this%qbm * this%psi(pp))
-      qtmh2 = qtmh1 * this%gamma(pp)
-      ep(:, i) = ep(:, i) * qtmh2
-      bp(:, i) = bp(:, i) * qtmh1
-
-      ! first half of electric field acceleration
-      utmp(:, i) = this%p(:, pp) + ep(:, i)
-
-      ! rotation about magnetic field
-      this%p(1, pp) = utmp(1, i) + utmp(2, i) * bp(3, i) - utmp(3, i) * bp(2, i)
-      this%p(2, pp) = utmp(2, i) + utmp(3, i) * bp(1, i) - utmp(1, i) * bp(3, i)
-      this%p(3, pp) = utmp(3, i) + utmp(1, i) * bp(2, i) - utmp(2, i) * bp(1, i)
-
-      ostq = 2.0 / ( 1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2 )
-      bp(1, i) = bp(1, i) * ostq
-      bp(2, i) = bp(2, i) * ostq
-      bp(3, i) = bp(3, i) * ostq
-
-      utmp(1, i) = utmp(1, i) + this%p(2, pp) * bp(3, i) - this%p(3, pp) * bp(2, i)
-      utmp(2, i) = utmp(2, i) + this%p(3, pp) * bp(1, i) - this%p(1, pp) * bp(3, i)
-      utmp(3, i) = utmp(3, i) + this%p(1, pp) * bp(2, i) - this%p(2, pp) * bp(1, i)
-
-      ! second half of electric field acc.
-      this%p(:, pp) = utmp(:, i) + ep(:, i)
-
-      ! calculate time-centered gamma for push of position
-      this%gamma(pp) = sqrt(1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2)
-
-      pp = pp + 1
-    enddo
-  enddo
-
-  call stop_tprof( 'push 2D particles' )
-  call write_dbg(cls_name, sname, cls_level, 'ends')
-
-end subroutine push_u_std_part2d
-
-subroutine push_u_robust_part2d(this, ef, bf, dt_)
-
-  implicit none
-
-  class(part2d), intent(inout) :: this
-  class(field), intent(in) :: ef, bf
-  real, intent(in), optional :: dt_
-  ! local data
-  character(len=18), save :: sname = 'push_u_robust_part2d'
-  type(ufield), dimension(:), pointer :: ef_re, ef_im, bf_re, bf_im
-
-  integer :: i, np, max_mode, noff
-  real :: qtmh, qtmh1, qtmh2, gam, dtc, ostq, dt
-  real, dimension(p_p_dim, p_cache_size) :: bp, ep, utmp
-  real, dimension(0:1, p_cache_size) :: weight
-  real, dimension(p_cache_size) :: pcos, psin
-  integer, dimension(p_cache_size) :: idx
-  integer(kind=LG) :: ptrcur, pp
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'push 2D particles' )
-
-  dt = this%dt
-  if (present(dt_)) dt = dt_
-  qtmh = this%qbm * dt * 0.5
-  max_mode = ef%get_max_mode()
-
-  ef_re => ef%get_rf_re()
-  ef_im => ef%get_rf_im()
-  bf_re => bf%get_rf_re()
-  bf_im => bf%get_rf_im()
-  noff = ef_re(0)%get_noff(1)
-
-  do ptrcur = 1, this%npp, p_cache_size
-
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
-
-    ! interpolate fields to particles
-    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
-    call interp_field(ef_re, ef_im, max_mode, weight, idx, pcos, psin, np, ep)
-    call interp_field(bf_re, bf_im, max_mode, weight, idx, pcos, psin, np, bp)
-    call transform_to_cartesian(ep, np, pcos, psin)
-    call transform_to_cartesian(bp, np, pcos, psin)
-
-    pp = ptrcur
-    do i = 1, np
-      gam = sqrt( 1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2 )
-      qtmh1 = qtmh / (gam - this%p(3, pp))
-      qtmh2 = qtmh1 * gam
-      ep(:, i) = ep(:, i) * qtmh2
-      bp(:, i) = bp(:, i) * qtmh1
-
-      ! first half of electric field acceleration
-      utmp(:, i) = this%p(:, pp) + ep(:, i)
-
-      ! rotation about magnetic field
-      this%p(1, pp) = utmp(1, i) + utmp(2, i) * bp(3, i) - utmp(3, i) * bp(2, i)
-      this%p(2, pp) = utmp(2, i) + utmp(3, i) * bp(1, i) - utmp(1, i) * bp(3, i)
-      this%p(3, pp) = utmp(3, i) + utmp(1, i) * bp(2, i) - utmp(2, i) * bp(1, i)
-
-      ostq = 2.0 / ( 1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2 )
-      bp(:, i) = bp(:, i) * ostq
-
-      utmp(1, i) = utmp(1, i) + this%p(2, pp) * bp(3, i) - this%p(3, pp) * bp(2, i)
-      utmp(2, i) = utmp(2, i) + this%p(3, pp) * bp(1, i) - this%p(1, pp) * bp(3, i)
-      utmp(3, i) = utmp(3, i) + this%p(1, pp) * bp(2, i) - this%p(2, pp) * bp(1, i)
-
-      ! second half of electric field acc.
-      this%p(:, pp) = utmp(:, i) + ep(:, i)
-
-      ! calculate time-centered gamma for push of position
-      this%gamma(pp) = sqrt(1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2)
-
-      pp = pp + 1
-    enddo
-
-  enddo
-
-  call stop_tprof( 'push 2D particles' )
-  call write_dbg(cls_name, sname, cls_level, 'ends')
-
-end subroutine push_u_robust_part2d
-
-subroutine push_u_robust_pgc_part2d(this, ef, bf, af, dt_)
-
-  implicit none
-
-  class(part2d), intent(inout) :: this
-  class(field), intent(in) :: ef, bf
-  class(field_laser), intent(in) :: af
-  real, intent(in), optional :: dt_
-  ! local data
-  character(len=18), save :: sname = 'push_u_robust_pgc_part2d'
-  type(ufield), dimension(:), pointer :: ef_re, ef_im, bf_re, bf_im
-  type(ufield), dimension(:), pointer :: ar_re, ar_im, ai_re, ai_im
-  type(ufield), dimension(:), pointer :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
-
-  integer :: i, np, max_mode, noff
-  real :: qtmh, qtmh_e, qtmh_b, gam, dtc, ostq, gam_corr, tmp, qbm2_hf, dt
-  real, dimension(0:1, p_cache_size) :: weight
-  real, dimension(p_cache_size) :: pcos, psin
-  integer, dimension(p_cache_size) :: idx
-  real, dimension(p_p_dim, p_cache_size) :: bp, ep
-  real, dimension(p_cache_size) :: apr, api
-  real, dimension(3,p_cache_size) :: apr_grad, api_grad
-  real, dimension(p_p_dim) :: utmp
-  integer(kind=LG) :: ptrcur, pp
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'push 2D particles' )
-
-  dt = this%dt
-  if (present(dt_)) dt = dt_
-
-  qtmh = this%qbm * dt * 0.5
-  qbm2_hf = this%qbm**2 * 0.5
-  max_mode = ef%get_max_mode()
-
-  ef_re => ef%get_rf_re()
-  ef_im => ef%get_rf_im()
-  bf_re => bf%get_rf_re()
-  bf_im => bf%get_rf_im()
-
-  ar_re => af%get_cfr_re()
-  ar_im => af%get_cfr_im()
-  ai_re => af%get_cfi_re()
-  ai_im => af%get_cfi_im()
-
-  ar_grad_re => af%ar_grad_re
-  ar_grad_im => af%ar_grad_im
-  ai_grad_re => af%ai_grad_re
-  ai_grad_im => af%ai_grad_im
-
-  noff = ef_re(0)%get_noff(1)
-  do ptrcur = 1, this%npp, p_cache_size
-
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
-
-    ! interpolate fields to particles
-    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
-    call interp_field(ef_re, ef_im, max_mode, weight, idx, pcos, psin, np, ep)
-    call interp_field(bf_re, bf_im, max_mode, weight, idx, pcos, psin, np, bp)
-    call interp_field(ar_re, ar_im, max_mode, weight, idx, pcos, psin, np, apr)
-    call interp_field(ai_re, ai_im, max_mode, weight, idx, pcos, psin, np, api)
-    call interp_field(ar_grad_re, ar_grad_im, max_mode, weight, idx, pcos, psin, np, apr_grad)
-    call interp_field(ai_grad_re, ai_grad_im, max_mode, weight, idx, pcos, psin, np, api_grad)
-    call transform_to_cartesian(ep, np, pcos, psin)
-    call transform_to_cartesian(bp, np, pcos, psin)
-    call transform_to_cartesian(apr_grad, np, pcos, psin)
-    call transform_to_cartesian(api_grad, np, pcos, psin)
-
-    ! note that the gamma and momenta calculated in this subroutine are the ones after averaging.
-    pp = ptrcur
-    do i = 1, np
-
-      ! calculate the correction factor for gamma due to ponderomotive force
-      gam_corr = qbm2_hf * ( apr(i)**2 + api(i)**2 )
-
-      ! calculate the effective electric fields
-      ! note that this%gamma is already time-centered
-      tmp = 0.5 * this%qbm / this%gamma(pp)
-      ep(1,i) = ep(1,i) - tmp * ( apr(i) * apr_grad(1,i) + api(i) * api_grad(1,i) )
-      ep(2,i) = ep(2,i) - tmp * ( apr(i) * apr_grad(2,i) + api(i) * api_grad(2,i) )
-      ep(3,i) = ep(3,i) + tmp * ( apr(i) * apr_grad(3,i) + api(i) * api_grad(3,i) )
-
-      ! half acceleration due to effective electric field
-      ! note that this%psi (1 - (q/m)*psi) is already time-centered
-      qtmh_b = qtmh / (1.0 - this%qbm * this%psi(pp))
-      qtmh_e = qtmh_b * this%gamma(pp)
-      ep(:,i) = ep(:,i) * qtmh_e
-      utmp(:) = this%p(:,pp) + ep(:,i)
-
-      ! rotation about magnetic field
-      bp(:,i) = bp(:,i) * qtmh_b
-      this%p(1,pp) = utmp(1) + utmp(2) * bp(3,i) - utmp(3) * bp(2,i)
-      this%p(2,pp) = utmp(2) + utmp(3) * bp(1,i) - utmp(1) * bp(3,i)
-      this%p(3,pp) = utmp(3) + utmp(1) * bp(2,i) - utmp(2) * bp(1,i)
-
-      ostq = 2.0 / ( 1.0 + bp(1,i)**2 + bp(2,i)**2 + bp(3,i)**2 )
-      bp(1,i) = bp(1,i) * ostq
-      bp(2,i) = bp(2,i) * ostq
-      bp(3,i) = bp(3,i) * ostq
-
-      utmp(1) = utmp(1) + this%p(2,pp) * bp(3,i) - this%p(3,pp) * bp(2,i)
-      utmp(2) = utmp(2) + this%p(3,pp) * bp(1,i) - this%p(1,pp) * bp(3,i)
-      utmp(3) = utmp(3) + this%p(1,pp) * bp(2,i) - this%p(2,pp) * bp(1,i)
-
-      ! half acceleration due to effective electric field
-      this%p(:,pp) = utmp(:) + ep(:,i)
-
-      ! calculate time-centered gamma for push of position
-      tmp = apr_grad(3, i) * dt; gam_corr = gam_corr + qbm2_hf * (apr(i) + 0.25 * tmp) * tmp
-      tmp = api_grad(3, i) * dt; gam_corr = gam_corr + qbm2_hf * (api(i) + 0.25 * tmp) * tmp
-      this%gamma(pp) = sqrt(1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2 + gam_corr)
-
-      pp = pp + 1
-    enddo
-
-  enddo
-
-  call stop_tprof( 'push 2D particles' )
-  call write_dbg(cls_name, sname, cls_level, 'ends')
-
-end subroutine push_u_robust_pgc_part2d
-
-subroutine push_u_std_pgc_part2d(this, ef, bf, af, dt_)
-
-  implicit none
-
-  class(part2d), intent(inout) :: this
-  class(field), intent(in) :: ef, bf
-  class(field_laser), intent(in) :: af
-  real, intent(in), optional :: dt_
-  ! local data
-  character(len=18), save :: sname = 'push_u_std_pgc_part2d'
-  type(ufield), dimension(:), pointer :: ef_re, ef_im, bf_re, bf_im
-  type(ufield), dimension(:), pointer :: ar_re, ar_im, ai_re, ai_im
-  type(ufield), dimension(:), pointer :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
-  integer :: i, np, max_mode, noff
-  real :: qtmh, qtmh_e, qtmh_b, gam, dtc, ostq, gam_corr, tmp, dt, qbm2_hf
-  real, dimension(0:1, p_cache_size) :: weight
-  real, dimension(p_cache_size) :: pcos, psin
-  integer, dimension(p_cache_size) :: idx
-  real, dimension(p_p_dim, p_cache_size) :: bp, ep
-  real, dimension(p_cache_size) :: apr, api
-  real, dimension(3,p_cache_size) :: apr_grad, api_grad
-  real, dimension(p_p_dim) :: utmp
-  integer(kind=LG) :: ptrcur, pp
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'push 2D particles' )
-
-  dt = this%dt
-  if (present(dt_)) dt = dt_
-
-  qtmh = this%qbm * this%dt * 0.5
-  qbm2_hf = this%qbm**2 * 0.5
-  max_mode = ef%get_max_mode()
-
-  ef_re => ef%get_rf_re()
-  ef_im => ef%get_rf_im()
-  bf_re => bf%get_rf_re()
-  bf_im => bf%get_rf_im()
-
-  ar_re => af%get_cfr_re()
-  ar_im => af%get_cfr_im()
-  ai_re => af%get_cfi_re()
-  ai_im => af%get_cfi_im()
-
-  ar_grad_re => af%ar_grad_re
-  ar_grad_im => af%ar_grad_im
-  ai_grad_re => af%ai_grad_re
-  ai_grad_im => af%ai_grad_im
-
-  noff = ef_re(0)%get_noff(1)
-  do ptrcur = 1, this%npp, p_cache_size
-
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
-
-    ! interpolate fields to particles
-    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
-    call interp_field(ef_re, ef_im, max_mode, weight, idx, pcos, psin, np, ep)
-    call interp_field(bf_re, bf_im, max_mode, weight, idx, pcos, psin, np, bp)
-    call interp_field(ar_re, ar_im, max_mode, weight, idx, pcos, psin, np, apr)
-    call interp_field(ai_re, ai_im, max_mode, weight, idx, pcos, psin, np, api)
-    call interp_field(ar_grad_re, ar_grad_im, max_mode, weight, idx, pcos, psin, np, apr_grad)
-    call interp_field(ai_grad_re, ai_grad_im, max_mode, weight, idx, pcos, psin, np, api_grad)
-    call transform_to_cartesian(ep, np, pcos, psin)
-    call transform_to_cartesian(bp, np, pcos, psin)
-    call transform_to_cartesian(apr_grad, np, pcos, psin)
-    call transform_to_cartesian(api_grad, np, pcos, psin)
-
-    ! note that the gamma and momenta calculated in this subroutine are the ones after averaging.
-    pp = ptrcur
-    do i = 1, np
-
-      ! calculate the correction factor for gamma due to ponderomotive force
-      gam_corr = qbm2_hf * (apr(i)**2 + api(i)**2)
-
-      ! calculate the effective electric fields
-      ! note that this%gamma is already time-centered
-      tmp = 0.5 * this%qbm / this%gamma(pp)
-      ep(1, i) = ep(1, i) - tmp * ( apr(i) * apr_grad(1, i) + api(i) * api_grad(1, i) )
-      ep(2, i) = ep(2, i) - tmp * ( apr(i) * apr_grad(2, i) + api(i) * api_grad(2, i) )
-      ep(3, i) = ep(3, i) + tmp * ( apr(i) * apr_grad(3, i) + api(i) * api_grad(3, i) )
-
-      ! normalize the fields
-      qtmh_b = qtmh / (1.0 - this%qbm * this%psi(pp))
-      qtmh_e = qtmh_b * this%gamma(pp)
-      ep(:, i) = ep(:, i) * qtmh_e
-      bp(:, i) = bp(:, i) * qtmh_b
-
-      ! half acceleration due to effective electric field
-      utmp(:) = this%p(:, pp) + ep(:, i)
-
-      ! rotation about magnetic field
-      this%p(1, pp) = utmp(1) + utmp(2) * bp(3, i) - utmp(3) * bp(2, i)
-      this%p(2, pp) = utmp(2) + utmp(3) * bp(1, i) - utmp(1) * bp(3, i)
-      this%p(3, pp) = utmp(3) + utmp(1) * bp(2, i) - utmp(2) * bp(1, i)
-
-      ostq = 2.0 / (1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2)
-      bp(1, i) = bp(1, i) * ostq
-      bp(2, i) = bp(2, i) * ostq
-      bp(3, i) = bp(3, i) * ostq
-
-      utmp(1) = utmp(1) + this%p(2, pp) * bp(3, i) - this%p(3, pp) * bp(2, i)
-      utmp(2) = utmp(2) + this%p(3, pp) * bp(1, i) - this%p(1, pp) * bp(3, i)
-      utmp(3) = utmp(3) + this%p(1, pp) * bp(2, i) - this%p(2, pp) * bp(1, i)
-
-      ! half acceleration due to effective electric field
-      this%p(:, pp) = utmp(:) + ep(:, i)
-
-      ! calculate time-centered gamma for push of position
-      tmp = apr_grad(3, i) * dt; gam_corr = gam_corr + qbm2_hf * (apr(i) + 0.25 * tmp) * tmp
-      tmp = api_grad(3, i) * dt; gam_corr = gam_corr + qbm2_hf * (api(i) + 0.25 * tmp) * tmp
-      this%gamma(pp) = sqrt(1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2 + gam_corr)
-
-      pp = pp + 1
-    enddo
-
-  enddo
-
-  call stop_tprof( 'push 2D particles' )
-  call write_dbg(cls_name, sname, cls_level, 'ends')
-
-end subroutine push_u_std_pgc_part2d
-
-subroutine push_x_part2d(this, dt_)
-
-  implicit none
-
-  class(part2d), intent(inout) :: this
-  real, intent(in), optional :: dt_
-  ! local data
-  character(len=18), save :: sname = 'push_x_part2d'
-
-  integer :: i, np
-  real :: gam, dtc, dt
-  integer(kind=LG) :: ptrcur, pp
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'push 2D particles' )
-
-  dt = this%dt
-  if (present(dt_)) dt = dt_
-
-  do ptrcur = 1, this%npp, p_cache_size
-
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
-
-    pp = ptrcur
-    do i = 1, np
-      dtc = dt / (this%gamma(pp) - this%p(3, pp))
-      this%x(1, pp) = this%x(1, pp) + this%p(1, pp) * dtc
-      this%x(2, pp) = this%x(2, pp) + this%p(2, pp) * dtc
-      pp = pp + 1
-    enddo
-
-  enddo
-
-  call stop_tprof( 'push 2D particles' )
-  call write_dbg(cls_name, sname, cls_level, 'ends')
-
-end subroutine push_x_part2d
-
-subroutine interp_psi_part2d(this, psi_re, psi_im, max_mode)
-
-  implicit none
-  class(part2d), intent(inout) :: this
-  type(ufield), dimension(:), pointer, intent(in) :: psi_re, psi_im
-  integer, intent(in) :: max_mode
-
-  ! real, dimension(:, :), pointer :: psi_re_ptr, psi_im_ptr
-  integer :: noff, i, np
-  integer(kind=LG) :: pp, ptrcur
-  ! real :: pos, idr, ph_r, ph_i, cc, ss
-  ! real, dimension(0:1) :: wt
-  ! complex(kind=DB) :: phase0, phase
-
-  real, dimension(0:1, p_cache_size) :: weight
-  real, dimension(p_cache_size) :: pcos, psin, psip
-  integer, dimension(p_cache_size) :: idx
-
-  ! idr = 1.0 / this%dr
-  noff = psi_re(0)%get_noff(1)
-  ! psi_re_ptr => psi_re(0)%get_f1()
-
-  do ptrcur = 1, this%npp, p_cache_size
-
-    ! check if last copy of table and set np
-    if( ptrcur + p_cache_size > this%npp ) then
-      np = this%npp - ptrcur + 1
-    else
-      np = p_cache_size
-    endif
-
-    call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
-    call interp_field(psi_re, psi_im, max_mode, weight, idx, pcos, psin, np, psip)
-
-    pp = ptrcur
-    do i = 1, np
-      this%psi(pp) = psip(i)
-    enddo
-
-  enddo
-
-end subroutine interp_psi_part2d
-
-subroutine update_bound_part2d( this )
-
-   implicit none
-
-   class(part2d), intent(inout) :: this
-   ! local
-   integer(kind=LG) :: i
-   real :: pos
-   character(len=32), save :: sname = "update_bound_part2d"
-
-   call write_dbg(cls_name, sname, cls_level, 'starts')
-
-   if ( this%npp == 0 ) return
-
-   call start_tprof( 'push 2D particles' )
-
-   i = 1
-
-   do while ( i < this%npp )
-
-      pos = sqrt( this%x(1,i)**2 + this%x(2,i)**2 )
-
-      ! check if particle goes out of the physical edge
-      if ( pos >= this%edge ) then
-         this%x(:,i)   = this%x(:, this%npp)
-         this%p(:,i)   = this%p(:, this%npp)
-         this%gamma(i) = this%gamma(this%npp)
-         this%psi(i)   = this%psi(this%npp)
-         this%q(i)     = this%q(this%npp)
-         this%npp = this%npp - 1
-         cycle
-      endif
-
-      i = i + 1
-   enddo
-
-   ! deal with the last particle
-   pos = sqrt( this%x(1,this%npp)**2 + this%x(2,this%npp)**2 )
-
-   if ( pos >= this%edge ) then
-      this%npp = this%npp - 1
-   endif
-
-   call stop_tprof( 'push 2D particles' )
-   call write_dbg(cls_name, sname, cls_level, 'ends')
-
-end subroutine update_bound_part2d
-
-subroutine pipesend_part2d(this, tag, id)
-
-  implicit none
-
-  class(part2d), intent(inout) :: this
-  integer, intent(in) :: tag
-  integer, intent(inout) :: id
-
-  ! local data
-  character(len=18), save :: sname = 'pipesend_part2d'
-  integer :: des, ierr, i, stride
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-
-  des = id_proc() + num_procs_loc()
-
-  if ( des >= num_procs() ) then
-    id = MPI_REQUEST_NULL
-    call write_dbg(cls_name, sname, cls_level, 'ends')
-    return
-  endif
-
-  do i = 1, this%npp
-
-    stride = (i-1) * this%part_dim
-
-    this%pbuf( 1 + stride ) = this%x(1,i)
-    this%pbuf( 2 + stride ) = this%x(2,i)
-    this%pbuf( 3 + stride ) = this%p(1,i)
-    this%pbuf( 4 + stride ) = this%p(2,i)
-    this%pbuf( 5 + stride ) = this%p(3,i)
-    this%pbuf( 6 + stride ) = this%gamma(i)
-    this%pbuf( 7 + stride ) = this%psi(i)
-    this%pbuf( 8 + stride ) = this%q(i)
-
-  enddo
-
-  ! NOTE: npp*xdim might be larger than MAX_INT32
-  call mpi_isend( this%pbuf, int(this%npp * this%part_dim), p_dtype_real, des, tag, &
-    comm_world(), id, ierr )
-
-  ! check for errors
-  if (ierr /= 0) then
-    call write_err('MPI_ISEND failed')
-  endif
-
-  call write_dbg(cls_name, sname, cls_level, 'ends')
-
-end subroutine pipesend_part2d
-
-subroutine piperecv_part2d(this, tag)
-
-  implicit none
-
-  class(part2d), intent(inout) :: this
-  integer, intent(in) :: tag
-  ! local data
-  character(len=18), save :: sname = 'piperecv_part2d'
-  integer, dimension(MPI_STATUS_SIZE) :: stat
-  integer :: recv_cnt, src, ierr, i, stride
-  real :: ratio
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-
-  if ( .not. allocated(recv_buf) ) then
-    allocate( recv_buf( this%part_dim * recv_buf_size ) )
-  endif
-
-  src = id_proc() - num_procs_loc()
-
-  if (src < 0) then
-    call write_dbg(cls_name, sname, cls_level, 'ends')
-    return
-  endif
-
-  ! probe for incoming message
-  call mpi_probe( src, tag, comm_world(), stat, ierr )
-  call mpi_get_count( stat, p_dtype_real, recv_cnt, ierr )
-  recv_cnt = recv_cnt / this%part_dim
-
-  ! check if the receiving buffer needs to be reallocated
-  if ( recv_cnt > recv_buf_size ) then
-    call write_stdout( '[process ' // num2str(id_proc()) // ']: Resizing 2D &
-          &particle pipeline receiving buffer!', only_root = .false. )
-    deallocate( recv_buf )
-    recv_buf_size = int( recv_cnt * p_buf_incr )
-    allocate( recv_buf( recv_buf_size * this%part_dim ) )
-  endif
-
-  ! NOTE: npp*xdim might be larger than MAX_INT32
-  call mpi_recv( recv_buf, this%part_dim * recv_buf_size, p_dtype_real, &
-    src, tag, comm_world(), MPI_STATUS_IGNORE, ierr )
-
-  ! check if the particle buffer needs to be reallocated
-  ratio = real( recv_cnt ) / this%npmax
-  if ( ratio > 1.0 ) then
-    call write_stdout( '[process ' // num2str(id_proc()) // ']: Resizing 2D &
-          &particle buffer!', only_root = .false. )
-    call this%realloc( ratio = ratio * p_buf_incr )
-  endif
-
-  this%npp = recv_cnt
-
-  do i = 1, this%npp
-
-    stride = (i-1) * this%part_dim
-
-    this%x(1,i)   = recv_buf( 1 + stride )
-    this%x(2,i)   = recv_buf( 2 + stride )
-    this%p(1,i)   = recv_buf( 3 + stride )
-    this%p(2,i)   = recv_buf( 4 + stride )
-    this%p(3,i)   = recv_buf( 5 + stride )
-    this%gamma(i) = recv_buf( 6 + stride )
-    this%psi(i)   = recv_buf( 7 + stride )
-    this%q(i)     = recv_buf( 8 + stride )
-
-  enddo
-
-  ! check for errors
-  if (ierr /= 0) then
-    call write_err('MPI failed')
-  endif
-
-  call write_dbg(cls_name, sname, cls_level, 'ends')
-
-end subroutine piperecv_part2d
-
-subroutine writehdf5_part2d(this,file)
-
-  implicit none
-
-  class(part2d), intent(inout) :: this
-  class(hdf5file), intent(in) :: file
-  ! local data
-  character(len=18), save :: sname = 'writehdf5_part2d'
-  integer :: ierr
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call pwpart( file, this%x, this%p, this%q, this%npp, 1, ierr )
-  call write_dbg(cls_name, sname, cls_level, 'ends')
-
-end subroutine writehdf5_part2d
-
-subroutine sort_part2d( this, nrp, noff )
-
-  implicit none
-  class(part2d), intent(inout) :: this
-  integer, intent(in) :: nrp, noff
-
-  ! local data
-  integer :: i
-  integer, dimension(:), allocatable :: ix
-  real :: idr, pos
-  character(len=18), save :: sname = 'sort_part2d'
-
-  call write_dbg(cls_name, sname, cls_level, 'starts')
-  call start_tprof( 'sort 2D particles' )
-
-  if ( .not. allocated(sort_idx) ) allocate( sort_idx(this%npmax) )
-  if ( this%npmax > size(sort_idx) ) then
-    deallocate(sort_idx)
-    allocate( sort_idx(this%npmax) )
-  endif
-
-  allocate( ix(this%npp) )
-  ix = 0
-
-  idr = 1.0 / this%dr
-  ! calculate the grid index of particles
-  do i = 1, this%npp
-    pos = sqrt( this%x(1,i)**2 + this%x(2,i)**2 ) * idr
-    ix(i) = floor(pos) - noff + 1
-  enddo
+subroutine AMJ_STD_PGC(this, ef, bf, af, cu, amu, dcu, dt_)
+    ! deposit the current, acceleration and momentum flux
+    
+      implicit none
+    
+      class(part2d), intent(inout) :: this
+      class(field), intent(in) :: cu, amu, dcu
+      class(field), intent(in) :: ef, bf
+      class(field_laser), intent(in) :: af
+      real, intent(in), optional :: dt_
+      ! local data
+      character(len=32), save :: sname = 'amjdeposit_std_pgc_part2d'
+      type(ufield), dimension(:), pointer :: ef_re => null(), ef_im => null()
+      type(ufield), dimension(:), pointer :: bf_re => null(), bf_im => null()
+      type(ufield), dimension(:), pointer :: cu_re => null(), cu_im => null()
+      type(ufield), dimension(:), pointer :: dcu_re => null(), dcu_im => null()
+      type(ufield), dimension(:), pointer :: amu_re => null(), amu_im => null()
+      type(ufield), dimension(:), pointer :: ar_re, ar_im, ai_re, ai_im
+      type(ufield), dimension(:), pointer :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
+    
+      real, dimension(:,:), pointer :: cu0 => null(), dcu0 => null(), amu0 => null()
+      real, dimension(:,:), pointer :: cur => null(), dcur => null(), amur => null()
+      real, dimension(:,:), pointer :: cui => null(), dcui => null(), amui => null()
+    
+      integer(kind=LG) :: ptrcur, pp
+      integer :: i, j, noff, nrp, np, mode, max_mode
+      integer, dimension(p_cache_size) :: ix
+      real, dimension(p_p_dim, p_cache_size) :: bp, ep, wp, u0, u
+      real, dimension(p_cache_size) :: apr, api
+      real, dimension(3,p_cache_size) :: apr_grad, api_grad
+      real, dimension(:, :), allocatable :: weight
+      real, dimension(p_cache_size) :: pcos, psin
+      real, dimension(p_p_dim) :: du, u2, utmp
+      real :: qtmh, qtmh_e, qtmh_b, idt, gam, ostq, ipsi, dpsi, w, ir, gam_corr, tmp, dt
+      complex(kind=DB) :: phase, phase0
+    
+      call write_dbg(cls_name, sname, cls_level, 'starts')
+      call start_tprof( 'deposit 2D particles' )
+      allocate(weight(LP:UP, p_cache_size))
+    
+      ef_re  => ef%get_rf_re();  ef_im  => ef%get_rf_im()
+      bf_re  => bf%get_rf_re();  bf_im  => bf%get_rf_im()
+      cu_re  => cu%get_rf_re();  cu_im  => cu%get_rf_im()
+      dcu_re => dcu%get_rf_re(); dcu_im => dcu%get_rf_im()
+      amu_re => amu%get_rf_re(); amu_im => amu%get_rf_im()
+    
+      dt = this%dt
+      if (present(dt_)) dt = dt_
+    
+      idt = 1.0 / dt
+      qtmh = 0.5 * this%qbm * dt
+      max_mode = ef%get_max_mode()
+    
+      noff = cu_re(0)%get_noff(1)
+      nrp  = cu_re(0)%get_ndp(1)
+    
+      cu0  => cu_re(0)%get_f1()
+      dcu0 => dcu_re(0)%get_f1()
+      amu0 => amu_re(0)%get_f1()
+    
+      ar_re => af%get_cfr_re()
+      ar_im => af%get_cfr_im()
+      ai_re => af%get_cfi_re()
+      ai_im => af%get_cfi_im()
+    
+      ar_grad_re => af%ar_grad_re
+      ar_grad_im => af%ar_grad_im
+      ai_grad_re => af%ai_grad_re
+      ai_grad_im => af%ai_grad_im
+    
+      do ptrcur = 1, this%npp, p_cache_size
+    
+        ! check if last copy of table and set np
+        if( ptrcur + p_cache_size > this%npp ) then
+          np = this%npp - ptrcur + 1
+        else
+          np = p_cache_size
+        endif
+    
+        ! interpolate fields to particles
+        ! call interp_emf_part2d( ef_re, ef_im, bf_re, bf_im, max_mode, this%x, this%dr, &
+        !   bp, ep, np, ptrcur, p_cylindrical, weight = wt, ix = ix, pcos = cc, psin = ss )
+        ! call interp_laser_part2d( ar_re, ar_im, ai_re, ai_im, &
+        !   ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im, &
+        !   max_mode, this%x, this%dr, apr, api, apr_grad, api_grad, np, ptrcur, p_cylindrical )
+        call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, ix, pcos, psin)
+        call interp_field(ef_re, ef_im, max_mode, weight, ix, pcos, psin, np, ep)
+        call interp_field(bf_re, bf_im, max_mode, weight, ix, pcos, psin, np, bp)
+        call interp_field(ar_re, ar_im, max_mode, weight, ix, pcos, psin, np, apr)
+        call interp_field(ai_re, ai_im, max_mode, weight, ix, pcos, psin, np, api)
+        call interp_field(ar_grad_re, ar_grad_im, max_mode, weight, ix, pcos, psin, np, apr_grad)
+        call interp_field(ai_grad_re, ai_grad_im, max_mode, weight, ix, pcos, psin, np, api_grad)
+    
+        ! advance the particle momentum
+        pp = ptrcur
+        do i = 1, np
+    
+          ! calculate the correction factor for gamma due to ponderomotive force
+          gam_corr = 0.5 * this%qbm**2 * (apr(i)**2 + api(i)**2)
+    
+          ! transform momentum from Cartesian to cylindrical coordinates
+          u0(1, i) = this%p(1, pp) * pcos(i) + this%p(2, pp) * psin(i)
+          u0(2, i) = this%p(2, pp) * pcos(i) - this%p(1, pp) * psin(i)
+          u0(3, i) = this%p(3, pp)
+    
+          ! calculate the averaged gamma factor
+          gam = sqrt(1.0 + u0(1, i)**2 + u0(2, i)**2 + u0(3, i)**2 + gam_corr)
+    
+          ! calculate wake field
+          wp(1, i) = ep(1, i) - bp(2, i)
+          wp(2, i) = ep(2, i) + bp(1, i)
+          wp(3, i) = ep(3, i)
+    
+          ! TODO: There is a strict time-centered scheme for the standard-PGC pusher, which is left for future work.
+    
+          ! calculate the effective electric fields
+          ! note that here the time-centered gamma is unknown, so we approximate it using the initial one.
+          tmp = 0.5 * this%qbm / gam
+          ep(1, i) = ep(1, i) - tmp * (apr(i) * apr_grad(1, i) + api(i) * api_grad(1, i))
+          ep(2, i) = ep(2, i) - tmp * (apr(i) * apr_grad(2, i) + api(i) * api_grad(2, i))
+          ep(3, i) = ep(3, i) + tmp * (apr(i) * apr_grad(3, i) + api(i) * api_grad(3, i))
+    
+          ! half electric acceleration
+          qtmh_b = qtmh / (1.0 - this%qbm * this%psi(pp))
+          qtmh_e = qtmh_b * gam
+          utmp(:) = u0(:, i) + ep(:, i) * qtmh_e
+    
+          ! magnetic rotation
+          bp(:, i) = bp(:, i) * qtmh_b
+          u(1, i) = utmp(1) + utmp(2) * bp(3, i) - utmp(3) * bp(2, i)
+          u(2, i) = utmp(2) + utmp(3) * bp(1, i) - utmp(1) * bp(3, i)
+          u(3, i) = utmp(3) + utmp(1) * bp(2, i) - utmp(2) * bp(1, i)
+    
+          ostq = 2.0 / (1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2)
+          bp(:, i) = bp(:, i) * ostq
+    
+          utmp(1) = utmp(1) + u(2, i) * bp(3, i) - u(3, i) * bp(2, i)
+          utmp(2) = utmp(2) + u(3, i) * bp(1, i) - u(1, i) * bp(3, i)
+          utmp(3) = utmp(3) + u(1, i) * bp(2, i) - u(2, i) * bp(1, i)
+    
+          ! half electric acceleration
+          gam = sqrt(1.0 + utmp(1)**2 + utmp(2)**2 + utmp(3)**2 + gam_corr)
+          qtmh_e = qtmh_b * gam
+          u(:, i) = utmp(:) + ep(:, i) * qtmh_e
+    
+          ! calculate and store time-centered values
+          ! deposit momentum flux, acceleration density, and current density
+          du(1) = idt * (u(1, i) - u0(1, i))
+          du(2) = idt * (u(2, i) - u0(2, i))
+          
+          u(:, i)  = 0.5 * (u(:, i) + u0(:, i))
+          ! store the time-centered gamma
+          this%gamma(pp) = sqrt(1.0 + u(1, i)**2 + u(2, i)**2 + u(3, i)**2 + gam_corr)
+    
+          ipsi = 1.0 / (1.0 - this%qbm * this%psi(pp))
+          dpsi = this%qbm * (wp(3, i) - (wp(1, i) * u(1, i) + wp(2, i) * u(2, i)) * ipsi)
+    
+          du(1) = du(1) + u(1, i) * dpsi * ipsi
+          du(2) = du(2) + u(2, i) * dpsi * ipsi
+    
+          u2(1) = u(1, i) * u(1, i) * ipsi
+          u2(2) = u(1, i) * u(2, i) * ipsi
+          u2(3) = u(2, i) * u(2, i) * ipsi
+    
+          phase0 = cmplx(pcos(i), -psin(i))
+          phase  = cmplx(1.0, 0.0) * this%q(pp) * ipsi
+    
+          ! deposit m = 0 mode
+          do j = LP, UP
+            w = weight(j, i) * real(phase)
+            cu0(1:3, ix(i)+j)  = cu0(1:3, ix(i) + j)  + w * u(1:3, i)
+            dcu0(1:2, ix(i)+j) = dcu0(1:2, ix(i) + j) + w * du(1:2)
+            amu0(1:3, ix(i)+j) = amu0(1:3, ix(i) + j) + w * u2(1:3)
+          enddo
+    
+          ! deposit m > 0 mode
+          do mode = 1, max_mode
+    
+            cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+            dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+            amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+            phase = phase * phase0
+    
+            do j = LP, UP
+              w = weight(j,i) * real(phase)
+              cur( 1:3, ix(i)+j )  = cur( 1:3, ix(i)+j )  + w * u(1:3,i)
+              dcur( 1:2, ix(i)+j ) = dcur( 1:2, ix(i)+j ) + w * du(1:2)
+              amur( 1:3, ix(i)+j ) = amur( 1:3, ix(i)+j ) + w * u2(1:3)
+    
+              w = weight(j,i) * aimag(phase)
+              cui( 1:3, ix(i)+j )  = cui( 1:3, ix(i)+j )  + w * u(1:3,i)
+              dcui( 1:2, ix(i)+j ) = dcui( 1:2, ix(i)+j ) + w * du(1:2)
+              amui( 1:3, ix(i)+j ) = amui( 1:3, ix(i)+j ) + w * u2(1:3)
+            enddo
+    
+          enddo
+    
+          pp = pp + 1
+        enddo
+    
+      enddo
+    
+      if ( id_proc_loc() ==  0 ) then
+! Correct the used values on axis by adding the value on the guard cell 0
+#ifdef QUADRATIC
+        cu0(3,1) = cu0(3,1) + cu0(3,0)
+#endif
+        ! guard cells on the axis are useless
+        cu0(1:3,0)  = 0.0
+        dcu0(1:2,0) = 0.0
+        amu0(1:3,0) = 0.0
+    
+        cu0(1:2,1)  = 0.0; cu0(3,1) = 8.0 * cu0(3,1)
+        dcu0(1:2,1) = 0.0
+        amu0(1:3,1) = 0.0
+    
+        do mode = 1, max_mode
+    
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          ! guard cells on the axis are useless
+          cur(1:3,0)  = 0.0; cui(1:3,0)  = 0.0
+          dcur(1:2,0) = 0.0; dcui(1:2,0) = 0.0
+          amur(1:3,0) = 0.0; amui(1:3,0) = 0.0
+            
+          if ( mode == 1 ) then
+#ifdef QUADRATIC
+            cur(1:2,1) = cur(1:2,1) + cur(1:2,0)
+            dcur(1:2,1) = dcur(1:2,1) + dcur(1:2,0)
+            cui(1:2,1) = cui(1:2,1) + cui(1:2,0)
+            dcui(1:2,1) = dcui(1:2,1) + dcui(1:2,0)
+#endif
+            cur(1:2,1)  = 8.0 * cur(1:2,1); cur(3,1) = 0.0
+            dcur(1:2,1) = 8.0 * dcur(1:2,1)
+            amur(1:3,1) = 0.0
+            cui(1:2,1)  = 8.0 * cui(1:2,1); cui(3,1) = 0.0
+            dcui(1:2,1) = 8.0 * dcui(1:2,1)
+            amui(1:3,1) = 0.0
+          elseif ( mode == 2 ) then
+#ifdef QUADRATIC
+            amur(1:3,1) = amur(1:3,1) + amur(1:3,0)
+            amui(1:3,1) = amui(1:3,1) + amui(1:3,0)
+#endif
+            cur(1:3,1)  = 0.0
+            dcur(1:2,1) = 0.0
+            amur(1:3,1) = 8.0 * amur(1:3,1)
+            cui(1:3,1)  = 0.0
+            dcui(1:2,1) = 0.0
+            amui(1:3,1) = 8.0 * amui(1:3,1)
+          else
+            cur(1:3,1)  = 0.0
+            dcur(1:2,1) = 0.0
+            amur(1:3,1) = 0.0
+            cui(1:3,1)  = 0.0
+            dcui(1:2,1) = 0.0
+            amui(1:3,1) = 0.0
+            endif
+        enddo
+    
+        do j = 2, nrp + 1
+          ir = 1.0 / ( j + noff - 1 )
+          cu0(1:3,j)  = cu0(1:3,j)  * ir
+          dcu0(1:2,j) = dcu0(1:2,j) * ir
+          amu0(1:3,j) = amu0(1:3,j) * ir
+        enddo
+    
+        do mode = 1, max_mode
+    
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          do j = 2, nrp + 1
+            ir = 1.0 / ( j + noff - 1 )
+            cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
+            dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
+            amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+          enddo
+        enddo
+    
+      else
+! #ifdef QUADRATIC
+!         if (id_proc_loc() == num_procs_loc() - 1) then
+!         ! Correct the values on the boundary by adding the value on the guard cell nrp+2
+!             cu0(1:3, nrp) = cu0(1:3, nrp) + cu0(1:3, nrp+2)
+!             dcu0(1:2, nrp) = dcu0(1:2, nrp) + dcu0(1:2, nrp+2)
+!             amu0(1:3, nrp) = amu0(1:3, nrp) + amu0(1:3, nrp+2)
+            
+!             do mode = 1, max_mode
+!                 cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+!                 dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+!                 amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+
+!                 cur(1:3, nrp) = cur(1:3, nrp) + cur(1:3, nrp+2);  cui(1:3, nrp) = cui(1:3, nrp) + cui(1:3, nrp+2)
+!                 dcur(1:2, nrp) = dcur(1:2, nrp) + dcur(1:2, nrp+2);  dcui(1:2, nrp) = dcui(1:2, nrp) + dcui(1:2, nrp+2)
+!                 amur(1:3, nrp) = amur(1:3, nrp) + amur(1:3, nrp+2);  amui(1:3, nrp) = amui(1:3, nrp) + amui(1:3, nrp+2)
+!             enddo
+!         endif
+! #endif
+        do j = 0, nrp + 1
+            ir = 1.0 / ( j + noff - 1 )
+            cu0(1:3,j)  = cu0(1:3,j)  * ir
+            dcu0(1:2,j) = dcu0(1:2,j) * ir
+            amu0(1:3,j) = amu0(1:3,j) * ir
+        enddo
+    
+        do mode = 1, max_mode
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          do j = 0, nrp+1
+            ir = 1.0 / ( j + noff - 1 )
+            cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
+            dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
+            amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+          enddo
+        enddo
+    
+      endif
+      
+      call stop_tprof( 'deposit 2D particles' )
+      call write_dbg(cls_name, sname, cls_level, 'ends')
+      
+end subroutine AMJ_STD_PGC
 
-  ! generate the sorted indices
-  call generate_sort_idx_1d( ix, sort_idx, int(this%npp, kind=4), nrp )
+subroutine AMJ_ROB_PGC(this, ef, bf, af, cu, amu, dcu, dt_)
+    ! deposit the current, acceleration and momentum flux
+    
+      implicit none
+    
+      class(part2d), intent(inout) :: this
+      class(field), intent(in) :: cu, amu, dcu
+      class(field), intent(in) :: ef, bf
+      class(field_laser), intent(in) :: af
+      real, intent(in), optional :: dt_
+      ! local data
+      character(len=32), save :: sname = 'amjdeposit_robust_pgc_part2d'
+      type(ufield), dimension(:), pointer :: ef_re => null(), ef_im => null()
+      type(ufield), dimension(:), pointer :: bf_re => null(), bf_im => null()
+      type(ufield), dimension(:), pointer :: cu_re => null(), cu_im => null()
+      type(ufield), dimension(:), pointer :: dcu_re => null(), dcu_im => null()
+      type(ufield), dimension(:), pointer :: amu_re => null(), amu_im => null()
+      type(ufield), dimension(:), pointer :: ar_re, ar_im, ai_re, ai_im
+      type(ufield), dimension(:), pointer :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
+    
+      real, dimension(:,:), pointer :: cu0 => null(), dcu0 => null(), amu0 => null()
+      real, dimension(:,:), pointer :: cur => null(), dcur => null(), amur => null()
+      real, dimension(:,:), pointer :: cui => null(), dcui => null(), amui => null()
+    
+      integer(kind=LG) :: ptrcur, pp
+      integer :: i, j, noff, nrp, np, mode, max_mode
+      integer, dimension(p_cache_size) :: ix
+      real, dimension(p_p_dim, p_cache_size) :: bp, ep, wp, u0, u
+      real, dimension(p_cache_size) :: apr, api
+      real, dimension(3,p_cache_size) :: apr_grad, api_grad
+      real, dimension(:, :), allocatable :: weight
+      real, dimension(p_cache_size) :: pcos, psin
+      real, dimension(p_p_dim) :: du, u2, utmp
+      real :: qtmh, qtmh_e, qtmh_b, idt, gam, ostq, ipsi, dpsi, w, ir, gam_corr, tmp, dt
+      complex(kind=DB) :: phase, phase0
+    
+      call write_dbg(cls_name, sname, cls_level, 'starts')
+      call start_tprof( 'deposit 2D particles' )
+      
+      allocate(weight(LP:UP, p_cache_size))
+      
+      ef_re  => ef%get_rf_re();  ef_im  => ef%get_rf_im()
+      bf_re  => bf%get_rf_re();  bf_im  => bf%get_rf_im()
+      cu_re  => cu%get_rf_re();  cu_im  => cu%get_rf_im()
+      dcu_re => dcu%get_rf_re(); dcu_im => dcu%get_rf_im()
+      amu_re => amu%get_rf_re(); amu_im => amu%get_rf_im()
+    
+      dt = this%dt
+      if (present(dt_)) dt = dt_
+    
+      idt = 1.0 / dt
+      qtmh = 0.5 * this%qbm * dt
+      max_mode = ef%get_max_mode()
+    
+      noff = cu_re(0)%get_noff(1)
+      nrp  = cu_re(0)%get_ndp(1)
+    
+      cu0  => cu_re(0)%get_f1()
+      dcu0 => dcu_re(0)%get_f1()
+      amu0 => amu_re(0)%get_f1()
+    
+      ar_re => af%get_cfr_re()
+      ar_im => af%get_cfr_im()
+      ai_re => af%get_cfi_re()
+      ai_im => af%get_cfi_im()
+    
+      ar_grad_re => af%ar_grad_re
+      ar_grad_im => af%ar_grad_im
+      ai_grad_re => af%ai_grad_re
+      ai_grad_im => af%ai_grad_im
+    
+      do ptrcur = 1, this%npp, p_cache_size
+    
+        ! check if last copy of table and set np
+        if( ptrcur + p_cache_size > this%npp ) then
+          np = this%npp - ptrcur + 1
+        else
+          np = p_cache_size
+        endif
+    
+        ! interpolate fields to particles
+        call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, ix, pcos, psin)
+        call interp_field(ef_re, ef_im, max_mode, weight, ix, pcos, psin, np, ep)
+        call interp_field(bf_re, bf_im, max_mode, weight, ix, pcos, psin, np, bp)
+        call interp_field(ar_re, ar_im, max_mode, weight, ix, pcos, psin, np, apr)
+        call interp_field(ai_re, ai_im, max_mode, weight, ix, pcos, psin, np, api)
+        call interp_field(ar_grad_re, ar_grad_im, max_mode, weight, ix, pcos, psin, np, apr_grad)
+        call interp_field(ai_grad_re, ai_grad_im, max_mode, weight, ix, pcos, psin, np, api_grad)
+    
+        ! advance the particle momentum
+        pp = ptrcur
+        do i = 1, np
+    
+          ! calculate the correction factor for gamma due to ponderomotive force
+          gam_corr = 0.5 * this%qbm**2 * ( apr(i)**2 + api(i)**2 )
+    
+          ! transform momentum from Cartesian to cylindrical coordinates
+          u0(1,i) = this%p(1,pp) * pcos(i) + this%p(2,pp) * psin(i)
+          u0(2,i) = this%p(2,pp) * pcos(i) - this%p(1,pp) * psin(i)
+          u0(3,i) = this%p(3,pp)
+    
+          ! calculate the averaged gamma factor
+          gam = sqrt( 1.0 + u0(1,i)**2 + u0(2,i)**2 + u0(3,i)**2 + gam_corr )
+    
+          ! calculate wake field
+          wp(1,i) = ep(1,i) - bp(2,i)
+          wp(2,i) = ep(2,i) + bp(1,i)
+          wp(3,i) = ep(3,i)
+    
+          ! calculate the effective electric fields
+          ! note that here the time-centered gamma is unknown, so we approximate it using the initial one.
+          tmp = 0.5 * this%qbm / gam
+          ep(1,i) = ep(1,i) - tmp * ( apr(i) * apr_grad(1,i) + api(i) * api_grad(1,i) )
+          ep(2,i) = ep(2,i) - tmp * ( apr(i) * apr_grad(2,i) + api(i) * api_grad(2,i) )
+          ep(3,i) = ep(3,i) + tmp * ( apr(i) * apr_grad(3,i) + api(i) * api_grad(3,i) )
+    
+          ! half electric acceleration
+          qtmh_e = qtmh * gam / ( gam - u0(3,i) )
+          utmp(:) = u0(:,i) + ep(:,i) * qtmh_e
+    
+          ! magnetic rotation
+          gam = sqrt( 1.0 + utmp(1)**2 + utmp(2)**2 + utmp(3)**2 + gam_corr )
+          qtmh_b = qtmh / ( gam - utmp(3) )
+          bp(:,i) = bp(:,i) * qtmh_b
+    
+          u(1,i) = utmp(1) + utmp(2) * bp(3,i) - utmp(3) * bp(2,i)
+          u(2,i) = utmp(2) + utmp(3) * bp(1,i) - utmp(1) * bp(3,i)
+          u(3,i) = utmp(3) + utmp(1) * bp(2,i) - utmp(2) * bp(1,i)
+    
+          ostq = 2.0 / ( 1.0 + bp(1,i)**2 + bp(2,i)**2 + bp(3,i)**2 )
+          bp(:,i) = bp(:,i) * ostq
+    
+          utmp(1) = utmp(1) + u(2,i) * bp(3,i) - u(3,i) * bp(2,i)
+          utmp(2) = utmp(2) + u(3,i) * bp(1,i) - u(1,i) * bp(3,i)
+          utmp(3) = utmp(3) + u(1,i) * bp(2,i) - u(2,i) * bp(1,i)
+    
+          ! half electric acceleration
+          gam = sqrt( 1.0 + utmp(1)**2 + utmp(2)**2 + utmp(3)**2 + gam_corr )
+          qtmh_e = qtmh * gam / ( gam - utmp(3) )
+          u(:,i) = utmp(:) + ep(:,i) * qtmh_e
+    
+          ! calculate and store time-centered values
+          ! deposit momentum flux, acceleration density, and current density
+          du(1) = idt * ( u(1,i) - u0(1,i) )
+          du(2) = idt * ( u(2,i) - u0(2,i) )
+          
+          u(:,i)  = 0.5 * ( u(:,i) + u0(:,i) )
+          ! store the time-centered gamma and (1-(q/m)*psi)
+          this%gamma(pp) = sqrt( 1.0 + u(1,i)**2 + u(2,i)**2 + u(3,i)**2 + gam_corr )
+          ipsi = 1.0 / (this%gamma(pp) - u(3,i))
+          this%psi(pp) = (1.0 - 1.0 / ipsi) / this%qbm
+    
+          dpsi = this%qbm * ( wp(3,i) - ( wp(1,i) * u(1,i) + wp(2,i) * u(2,i) ) * ipsi )
+    
+          du(1) = du(1) + u(1,i) * dpsi * ipsi
+          du(2) = du(2) + u(2,i) * dpsi * ipsi
+    
+          u2(1) = u(1,i) * u(1,i) * ipsi
+          u2(2) = u(1,i) * u(2,i) * ipsi
+          u2(3) = u(2,i) * u(2,i) * ipsi
+    
+          phase0 = cmplx( pcos(i), -psin(i) )
+          phase  = cmplx( 1.0, 0.0 ) * this%q(pp) * ipsi
+    
+          ! deposit m = 0 mode
+          do j = LP, UP
+            w = weight(j,i) * real(phase)
+            cu0( 1:3, ix(i)+j )  = cu0( 1:3, ix(i)+j )  + w * u(1:3,i)
+            dcu0( 1:2, ix(i)+j ) = dcu0( 1:2, ix(i)+j ) + w * du(1:2)
+            amu0( 1:3, ix(i)+j ) = amu0( 1:3, ix(i)+j ) + w * u2(1:3)
+          enddo
+    
+          ! deposit m > 0 mode
+          do mode = 1, max_mode
+    
+            cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+            dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+            amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+            phase = phase * phase0
+    
+            do j = LP, UP
+              w = weight(j,i) * real(phase)
+              cur( 1:3, ix(i)+j )  = cur( 1:3, ix(i)+j )  + w * u(1:3,i)
+              dcur( 1:2, ix(i)+j ) = dcur( 1:2, ix(i)+j ) + w * du(1:2)
+              amur( 1:3, ix(i)+j ) = amur( 1:3, ix(i)+j ) + w * u2(1:3)
+    
+              w = weight(j,i) * aimag(phase)
+              cui( 1:3, ix(i)+j )  = cui( 1:3, ix(i)+j )  + w * u(1:3,i)
+              dcui( 1:2, ix(i)+j ) = dcui( 1:2, ix(i)+j ) + w * du(1:2)
+              amui( 1:3, ix(i)+j ) = amui( 1:3, ix(i)+j ) + w * u2(1:3)
+            enddo
+    
+          enddo
+    
+          pp = pp + 1
+        enddo
+    
+      enddo
+    
+      if ( id_proc_loc() == 0 ) then
+! Correct the used values on axis by adding the value on the guard cell 0
+#ifdef QUADRATIC
+    cu0(3,1) = cu0(3,1) + cu0(3,0)
+#endif
+        ! guard cells on the axis are useless
+        cu0(1:3,0)  = 0.0
+        dcu0(1:2,0) = 0.0
+        amu0(1:3,0) = 0.0
+    
+        cu0(1:2,1)  = 0.0; cu0(3,1) = 8.0 * cu0(3,1)
+        dcu0(1:2,1) = 0.0
+        amu0(1:3,1) = 0.0
+    
+        do mode = 1, max_mode
+    
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          ! guard cells on the axis are useless
+          cur(1:3,0)  = 0.0; cui(1:3,0)  = 0.0
+          dcur(1:2,0) = 0.0; dcui(1:2,0) = 0.0
+          amur(1:3,0) = 0.0; amui(1:3,0) = 0.0
+            
+          if ( mode == 1 ) then
+#ifdef QUADRATIC
+            cur(1:2,1) = cur(1:2,1) + cur(1:2,0)
+            dcur(1:2,1) = dcur(1:2,1) + dcur(1:2,0)
+            cui(1:2,1) = cui(1:2,1) + cui(1:2,0)
+            dcui(1:2,1) = dcui(1:2,1) + dcui(1:2,0)
+#endif
+            cur(1:2,1)  = 8.0 * cur(1:2,1); cur(3,1) = 0.0
+            dcur(1:2,1) = 8.0 * dcur(1:2,1)
+            amur(1:3,1) = 0.0
+            cui(1:2,1)  = 8.0 * cui(1:2,1); cui(3,1) = 0.0
+            dcui(1:2,1) = 8.0 * dcui(1:2,1)
+            amui(1:3,1) = 0.0
+          elseif ( mode == 2 ) then
+#ifdef QUADRATIC
+            amur(1:3,1) = amur(1:3,1) + amur(1:3,0)
+            amui(1:3,1) = amui(1:3,1) + amui(1:3,0)
+#endif
+            cur(1:3,1)  = 0.0
+            dcur(1:2,1) = 0.0
+            amur(1:3,1) = 8.0 * amur(1:3,1)
+            cui(1:3,1)  = 0.0
+            dcui(1:2,1) = 0.0
+            amui(1:3,1) = 8.0 * amui(1:3,1)
+          else
+            cur(1:3,1)  = 0.0
+            dcur(1:2,1) = 0.0
+            amur(1:3,1) = 0.0
+            cui(1:3,1)  = 0.0
+            dcui(1:2,1) = 0.0
+            amui(1:3,1) = 0.0
+            endif
+        enddo
+    
+        do j = 2, nrp + 1
+          ir = 1.0 / ( j + noff - 1 )
+          cu0(1:3,j)  = cu0(1:3,j)  * ir
+          dcu0(1:2,j) = dcu0(1:2,j) * ir
+          amu0(1:3,j) = amu0(1:3,j) * ir
+        enddo
+    
+        do mode = 1, max_mode
+    
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          do j = 2, nrp + 1
+            ir = 1.0 / ( j + noff - 1 )
+            cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
+            dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
+            amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+          enddo
+        enddo
+    
+      else
+! #ifdef QUADRATIC
+!         if (id_proc_loc() == num_procs_loc() - 1) then
+!         ! Correct the values on the boundary by adding the value on the guard cell nrp+2
+!             cu0(1:3, nrp) = cu0(1:3, nrp) + cu0(1:3, nrp+2)
+!             dcu0(1:2, nrp) = dcu0(1:2, nrp) + dcu0(1:2, nrp+2)
+!             amu0(1:3, nrp) = amu0(1:3, nrp) + amu0(1:3, nrp+2)
+            
+!             do mode = 1, max_mode
+!                 cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+!                 dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+!                 amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+
+!                 cur(1:3, nrp) = cur(1:3, nrp) + cur(1:3, nrp+2);  cui(1:3, nrp) = cui(1:3, nrp) + cui(1:3, nrp+2)
+!                 dcur(1:2, nrp) = dcur(1:2, nrp) + dcur(1:2, nrp+2);  dcui(1:2, nrp) = dcui(1:2, nrp) + dcui(1:2, nrp+2)
+!                 amur(1:3, nrp) = amur(1:3, nrp) + amur(1:3, nrp+2);  amui(1:3, nrp) = amui(1:3, nrp) + amui(1:3, nrp+2)
+!             enddo
+!         endif
+! #endif
+        do j = 0, nrp + 1
+            ir = 1.0 / ( j + noff - 1 )
+            cu0(1:3,j)  = cu0(1:3,j)  * ir
+            dcu0(1:2,j) = dcu0(1:2,j) * ir
+            amu0(1:3,j) = amu0(1:3,j) * ir
+        enddo
+    
+        do mode = 1, max_mode
+          cur  => cu_re(mode)%get_f1();  cui  => cu_im(mode)%get_f1()
+          dcur => dcu_re(mode)%get_f1(); dcui => dcu_im(mode)%get_f1()
+          amur => amu_re(mode)%get_f1(); amui => amu_im(mode)%get_f1()
+    
+          do j = 0, nrp+1
+            ir = 1.0 / ( j + noff - 1 )
+            cur(1:3,j)  = cur(1:3,j)  * ir; cui(1:3,j)  = cui(1:3,j)  * ir
+            dcur(1:2,j) = dcur(1:2,j) * ir; dcui(1:2,j) = dcui(1:2,j) * ir
+            amur(1:3,j) = amur(1:3,j) * ir; amui(1:3,j) = amui(1:3,j) * ir
+          enddo
+        enddo
+    
+      endif
+      
+      call stop_tprof( 'deposit 2D particles' )
+      call write_dbg(cls_name, sname, cls_level, 'ends')
 
-  ! rearrange the particle position
-  this%pbuf(1:this%npp) = this%x( 1, 1:this%npp )
-  do i = 1, this%npp
-    this%x( 1, sort_idx(i) ) = this%pbuf(i)
-  enddo
-  this%pbuf(1:this%npp) = this%x( 2, 1:this%npp )
-  do i = 1, this%npp
-    this%x( 2, sort_idx(i) ) = this%pbuf(i)
-  enddo
+end subroutine AMJ_ROB_PGC
 
-  ! rearrange the particle momentum
-  this%pbuf(1:this%npp) = this%p( 1, 1:this%npp )
-  do i = 1, this%npp
-    this%p( 1, sort_idx(i) ) = this%pbuf(i)
-  enddo
-  this%pbuf(1:this%npp) = this%p( 2, 1:this%npp )
-  do i = 1, this%npp
-    this%p( 2, sort_idx(i) ) = this%pbuf(i)
-  enddo
-  this%pbuf(1:this%npp) = this%p( 3, 1:this%npp )
-  do i = 1, this%npp
-    this%p( 3, sort_idx(i) ) = this%pbuf(i)
-  enddo
 
-  ! rearrange the particle gamma
-  this%pbuf(1:this%npp) = this%gamma( 1:this%npp )
-  do i = 1, this%npp
-    this%gamma( sort_idx(i) ) = this%pbuf(i)
-  enddo
+subroutine PUSH_STD(this, ef, bf, dt_)
 
-  ! rearrange the particle charge
-  this%pbuf(1:this%npp) = this%q( 1:this%npp )
-  do i = 1, this%npp
-    this%q( sort_idx(i) ) = this%pbuf(i)
-  enddo
+    implicit none
+  
+    class(part2d), intent(inout) :: this
+    class(field), intent(in) :: ef, bf
+    real, intent(in), optional :: dt_
+    ! local data
+    character(len=18), save :: sname = 'push_u_std_part2d'
+    type(ufield), dimension(:), pointer :: ef_re, ef_im, bf_re, bf_im
+    integer :: i, np, max_mode, noff
+    real :: qtmh, qtmh1, qtmh2, gam, dtc, ostq, dt
+    real, dimension(:, :), allocatable :: weight
+    real, dimension(p_cache_size) :: pcos, psin
+    integer, dimension(p_cache_size) :: idx
+    real, dimension(p_p_dim, p_cache_size) :: bp, ep, utmp
+    integer(kind=LG) :: ptrcur, pp
+  
+    call write_dbg(cls_name, sname, cls_level, 'starts')
+    call start_tprof( 'push 2D particles' )
+    allocate(weight(LP:UP, p_cache_size))
+  
+    dt = this%dt
+    if (present(dt_)) dt = dt_
+    qtmh = this%qbm * dt * 0.5
+    max_mode = ef%get_max_mode()
+  
+    ef_re => ef%get_rf_re()
+    ef_im => ef%get_rf_im()
+    bf_re => bf%get_rf_re()
+    bf_im => bf%get_rf_im()
+    noff = ef_re(0)%get_noff(1)
+  
+    do ptrcur = 1, this%npp, p_cache_size
+  
+      ! check if last copy of table and set np
+      if( ptrcur + p_cache_size > this%npp ) then
+        np = this%npp - ptrcur + 1
+      else
+        np = p_cache_size
+      endif
+  
+      ! interpolate fields to particles
+      call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
+      call interp_field(ef_re, ef_im, max_mode, weight, idx, pcos, psin, np, ep)
+      call interp_field(bf_re, bf_im, max_mode, weight, idx, pcos, psin, np, bp)
+      call transform_to_cartesian(ep, np, pcos, psin)
+      call transform_to_cartesian(bp, np, pcos, psin)
+  
+      pp = ptrcur
+      do i = 1, np
+  
+        ! normalize E and B fields
+        qtmh1 = qtmh / (1.0 - this%qbm * this%psi(pp))
+        qtmh2 = qtmh1 * this%gamma(pp)
+        ep(:, i) = ep(:, i) * qtmh2
+        bp(:, i) = bp(:, i) * qtmh1
+  
+        ! first half of electric field acceleration
+        utmp(:, i) = this%p(:, pp) + ep(:, i)
+  
+        ! rotation about magnetic field
+        this%p(1, pp) = utmp(1, i) + utmp(2, i) * bp(3, i) - utmp(3, i) * bp(2, i)
+        this%p(2, pp) = utmp(2, i) + utmp(3, i) * bp(1, i) - utmp(1, i) * bp(3, i)
+        this%p(3, pp) = utmp(3, i) + utmp(1, i) * bp(2, i) - utmp(2, i) * bp(1, i)
+  
+        ostq = 2.0 / ( 1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2 )
+        bp(1, i) = bp(1, i) * ostq
+        bp(2, i) = bp(2, i) * ostq
+        bp(3, i) = bp(3, i) * ostq
+  
+        utmp(1, i) = utmp(1, i) + this%p(2, pp) * bp(3, i) - this%p(3, pp) * bp(2, i)
+        utmp(2, i) = utmp(2, i) + this%p(3, pp) * bp(1, i) - this%p(1, pp) * bp(3, i)
+        utmp(3, i) = utmp(3, i) + this%p(1, pp) * bp(2, i) - this%p(2, pp) * bp(1, i)
+  
+        ! second half of electric field acc.
+        this%p(:, pp) = utmp(:, i) + ep(:, i)
+  
+        ! calculate time-centered gamma for push of position
+        this%gamma(pp) = sqrt(1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2)
+  
+        pp = pp + 1
+      enddo
+    enddo
+    
+    call stop_tprof( 'push 2D particles' )
+    call write_dbg(cls_name, sname, cls_level, 'ends')
+  
+end subroutine PUSH_STD
 
-  ! rearrange the particle psi
-  this%pbuf(1:this%npp) = this%psi( 1:this%npp )
-  do i = 1, this%npp
-    this%psi( sort_idx(i) ) = this%pbuf(i)
-  enddo
+subroutine PUSH_ROB(this, ef, bf, dt_)
 
-  deallocate( ix )
+    implicit none
+  
+    class(part2d), intent(inout) :: this
+    class(field), intent(in) :: ef, bf
+    real, intent(in), optional :: dt_
+    ! local data
+    character(len=18), save :: sname = 'push_u_robust_part2d'
+    type(ufield), dimension(:), pointer :: ef_re, ef_im, bf_re, bf_im
+  
+    integer :: i, np, max_mode, noff
+    real :: qtmh, qtmh1, qtmh2, gam, dtc, ostq, dt
+    real, dimension(p_p_dim, p_cache_size) :: bp, ep, utmp
+    real, dimension(:, :), allocatable :: weight
+    real, dimension(p_cache_size) :: pcos, psin
+    integer, dimension(p_cache_size) :: idx
+    integer(kind=LG) :: ptrcur, pp
+  
+    call write_dbg(cls_name, sname, cls_level, 'starts')
+    call start_tprof( 'push 2D particles' )
+    allocate(weight(LP:UP, p_cache_size))
+  
+    dt = this%dt
+    if (present(dt_)) dt = dt_
+    qtmh = this%qbm * dt * 0.5
+    max_mode = ef%get_max_mode()
+  
+    ef_re => ef%get_rf_re()
+    ef_im => ef%get_rf_im()
+    bf_re => bf%get_rf_re()
+    bf_im => bf%get_rf_im()
+    noff = ef_re(0)%get_noff(1)
+  
+    do ptrcur = 1, this%npp, p_cache_size
+  
+      ! check if last copy of table and set np
+      if( ptrcur + p_cache_size > this%npp ) then
+        np = this%npp - ptrcur + 1
+      else
+        np = p_cache_size
+      endif
+  
+      ! interpolate fields to particles
+      call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
+      call interp_field(ef_re, ef_im, max_mode, weight, idx, pcos, psin, np, ep)
+      call interp_field(bf_re, bf_im, max_mode, weight, idx, pcos, psin, np, bp)
+      call transform_to_cartesian(ep, np, pcos, psin)
+      call transform_to_cartesian(bp, np, pcos, psin)
+  
+      pp = ptrcur
+      do i = 1, np
+        gam = sqrt( 1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2 )
+        qtmh1 = qtmh / (gam - this%p(3, pp))
+        qtmh2 = qtmh1 * gam
+        ep(:, i) = ep(:, i) * qtmh2
+        bp(:, i) = bp(:, i) * qtmh1
+  
+        ! first half of electric field acceleration
+        utmp(:, i) = this%p(:, pp) + ep(:, i)
+  
+        ! rotation about magnetic field
+        this%p(1, pp) = utmp(1, i) + utmp(2, i) * bp(3, i) - utmp(3, i) * bp(2, i)
+        this%p(2, pp) = utmp(2, i) + utmp(3, i) * bp(1, i) - utmp(1, i) * bp(3, i)
+        this%p(3, pp) = utmp(3, i) + utmp(1, i) * bp(2, i) - utmp(2, i) * bp(1, i)
+  
+        ostq = 2.0 / ( 1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2 )
+        bp(:, i) = bp(:, i) * ostq
+  
+        utmp(1, i) = utmp(1, i) + this%p(2, pp) * bp(3, i) - this%p(3, pp) * bp(2, i)
+        utmp(2, i) = utmp(2, i) + this%p(3, pp) * bp(1, i) - this%p(1, pp) * bp(3, i)
+        utmp(3, i) = utmp(3, i) + this%p(1, pp) * bp(2, i) - this%p(2, pp) * bp(1, i)
+  
+        ! second half of electric field acc.
+        this%p(:, pp) = utmp(:, i) + ep(:, i)
+  
+        ! calculate time-centered gamma for push of position
+        this%gamma(pp) = sqrt(1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2)
+  
+        pp = pp + 1
+      enddo
+  
+    enddo
+    
+    call stop_tprof( 'push 2D particles' )
+    call write_dbg(cls_name, sname, cls_level, 'ends')
+  
+end subroutine PUSH_ROB
+  
+subroutine PUSH_ROB_PGC(this, ef, bf, af, dt_)
 
-  call stop_tprof( 'sort 2D particles' )
-  call write_dbg(cls_name, sname, cls_level, 'ends')
+    implicit none
+  
+    class(part2d), intent(inout) :: this
+    class(field), intent(in) :: ef, bf
+    class(field_laser), intent(in) :: af
+    real, intent(in), optional :: dt_
+    ! local data
+    character(len=18), save :: sname = 'push_u_robust_pgc_part2d'
+    type(ufield), dimension(:), pointer :: ef_re, ef_im, bf_re, bf_im
+    type(ufield), dimension(:), pointer :: ar_re, ar_im, ai_re, ai_im
+    type(ufield), dimension(:), pointer :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
+  
+    integer :: i, np, max_mode, noff
+    real :: qtmh, qtmh_e, qtmh_b, gam, dtc, ostq, gam_corr, tmp, qbm2_hf, dt
+    real, dimension(:, :), allocatable :: weight
+    real, dimension(p_cache_size) :: pcos, psin
+    integer, dimension(p_cache_size) :: idx
+    real, dimension(p_p_dim, p_cache_size) :: bp, ep
+    real, dimension(p_cache_size) :: apr, api
+    real, dimension(3,p_cache_size) :: apr_grad, api_grad
+    real, dimension(p_p_dim) :: utmp
+    integer(kind=LG) :: ptrcur, pp
+  
+    call write_dbg(cls_name, sname, cls_level, 'starts')
+    call start_tprof( 'push 2D particles' )
+    allocate(weight(LP:UP, p_cache_size))
+  
+    dt = this%dt
+    if (present(dt_)) dt = dt_
+  
+    qtmh = this%qbm * dt * 0.5
+    qbm2_hf = this%qbm**2 * 0.5
+    max_mode = ef%get_max_mode()
+  
+    ef_re => ef%get_rf_re()
+    ef_im => ef%get_rf_im()
+    bf_re => bf%get_rf_re()
+    bf_im => bf%get_rf_im()
+  
+    ar_re => af%get_cfr_re()
+    ar_im => af%get_cfr_im()
+    ai_re => af%get_cfi_re()
+    ai_im => af%get_cfi_im()
+  
+    ar_grad_re => af%ar_grad_re
+    ar_grad_im => af%ar_grad_im
+    ai_grad_re => af%ai_grad_re
+    ai_grad_im => af%ai_grad_im
+  
+    noff = ef_re(0)%get_noff(1)
+    do ptrcur = 1, this%npp, p_cache_size
+  
+      ! check if last copy of table and set np
+      if( ptrcur + p_cache_size > this%npp ) then
+        np = this%npp - ptrcur + 1
+      else
+        np = p_cache_size
+      endif
+  
+      ! interpolate fields to particles
+      call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
+      call interp_field(ef_re, ef_im, max_mode, weight, idx, pcos, psin, np, ep)
+      call interp_field(bf_re, bf_im, max_mode, weight, idx, pcos, psin, np, bp)
+      call interp_field(ar_re, ar_im, max_mode, weight, idx, pcos, psin, np, apr)
+      call interp_field(ai_re, ai_im, max_mode, weight, idx, pcos, psin, np, api)
+      call interp_field(ar_grad_re, ar_grad_im, max_mode, weight, idx, pcos, psin, np, apr_grad)
+      call interp_field(ai_grad_re, ai_grad_im, max_mode, weight, idx, pcos, psin, np, api_grad)
+      call transform_to_cartesian(ep, np, pcos, psin)
+      call transform_to_cartesian(bp, np, pcos, psin)
+      call transform_to_cartesian(apr_grad, np, pcos, psin)
+      call transform_to_cartesian(api_grad, np, pcos, psin)
+  
+      ! note that the gamma and momenta calculated in this subroutine are the ones after averaging.
+      pp = ptrcur
+      do i = 1, np
+  
+        ! calculate the correction factor for gamma due to ponderomotive force
+        gam_corr = qbm2_hf * ( apr(i)**2 + api(i)**2 )
+  
+        ! calculate the effective electric fields
+        ! note that this%gamma is already time-centered
+        tmp = 0.5 * this%qbm / this%gamma(pp)
+        ep(1,i) = ep(1,i) - tmp * ( apr(i) * apr_grad(1,i) + api(i) * api_grad(1,i) )
+        ep(2,i) = ep(2,i) - tmp * ( apr(i) * apr_grad(2,i) + api(i) * api_grad(2,i) )
+        ep(3,i) = ep(3,i) + tmp * ( apr(i) * apr_grad(3,i) + api(i) * api_grad(3,i) )
+  
+        ! half acceleration due to effective electric field
+        ! note that this%psi (1 - (q/m)*psi) is already time-centered
+        qtmh_b = qtmh / (1.0 - this%qbm * this%psi(pp))
+        qtmh_e = qtmh_b * this%gamma(pp)
+        ep(:,i) = ep(:,i) * qtmh_e
+        utmp(:) = this%p(:,pp) + ep(:,i)
+  
+        ! rotation about magnetic field
+        bp(:,i) = bp(:,i) * qtmh_b
+        this%p(1,pp) = utmp(1) + utmp(2) * bp(3,i) - utmp(3) * bp(2,i)
+        this%p(2,pp) = utmp(2) + utmp(3) * bp(1,i) - utmp(1) * bp(3,i)
+        this%p(3,pp) = utmp(3) + utmp(1) * bp(2,i) - utmp(2) * bp(1,i)
+  
+        ostq = 2.0 / ( 1.0 + bp(1,i)**2 + bp(2,i)**2 + bp(3,i)**2 )
+        bp(1,i) = bp(1,i) * ostq
+        bp(2,i) = bp(2,i) * ostq
+        bp(3,i) = bp(3,i) * ostq
+  
+        utmp(1) = utmp(1) + this%p(2,pp) * bp(3,i) - this%p(3,pp) * bp(2,i)
+        utmp(2) = utmp(2) + this%p(3,pp) * bp(1,i) - this%p(1,pp) * bp(3,i)
+        utmp(3) = utmp(3) + this%p(1,pp) * bp(2,i) - this%p(2,pp) * bp(1,i)
+  
+        ! half acceleration due to effective electric field
+        this%p(:,pp) = utmp(:) + ep(:,i)
+  
+        ! calculate time-centered gamma for push of position
+        tmp = apr_grad(3, i) * dt; gam_corr = gam_corr + qbm2_hf * (apr(i) + 0.25 * tmp) * tmp
+        tmp = api_grad(3, i) * dt; gam_corr = gam_corr + qbm2_hf * (api(i) + 0.25 * tmp) * tmp
+        this%gamma(pp) = sqrt(1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2 + gam_corr)
+  
+        pp = pp + 1
+      enddo
+  
+    enddo
     
-end subroutine sort_part2d
+    call stop_tprof( 'push 2D particles' )
+    call write_dbg(cls_name, sname, cls_level, 'ends')
+  
+  end subroutine PUSH_ROB_PGC
 
-function get_deposit_ax_corr( ppc_r ) result(res)
-  implicit none
-  integer, intent(in) :: ppc_r
-  real :: res
-  res = (12.0 * ppc_r**2) / ( 1.0 + 2.0 * ppc_r**2 ) 
-end function get_deposit_ax_corr
+  subroutine PUSH_STD_PGC(this, ef, bf, af, dt_)
 
-end module part2d_class
\ No newline at end of file
+    implicit none
+  
+    class(part2d), intent(inout) :: this
+    class(field), intent(in) :: ef, bf
+    class(field_laser), intent(in) :: af
+    real, intent(in), optional :: dt_
+    ! local data
+    character(len=18), save :: sname = 'push_u_std_pgc_part2d'
+    type(ufield), dimension(:), pointer :: ef_re, ef_im, bf_re, bf_im
+    type(ufield), dimension(:), pointer :: ar_re, ar_im, ai_re, ai_im
+    type(ufield), dimension(:), pointer :: ar_grad_re, ar_grad_im, ai_grad_re, ai_grad_im
+    integer :: i, np, max_mode, noff
+    real :: qtmh, qtmh_e, qtmh_b, gam, dtc, ostq, gam_corr, tmp, dt, qbm2_hf
+    real, dimension(:, :), allocatable :: weight
+    real, dimension(p_cache_size) :: pcos, psin
+    integer, dimension(p_cache_size) :: idx
+    real, dimension(p_p_dim, p_cache_size) :: bp, ep
+    real, dimension(p_cache_size) :: apr, api
+    real, dimension(3,p_cache_size) :: apr_grad, api_grad
+    real, dimension(p_p_dim) :: utmp
+    integer(kind=LG) :: ptrcur, pp
+  
+    call write_dbg(cls_name, sname, cls_level, 'starts')
+    call start_tprof( 'push 2D particles' )
+    allocate(weight(LP:UP, p_cache_size))
+  
+    dt = this%dt
+    if (present(dt_)) dt = dt_
+  
+    qtmh = this%qbm * this%dt * 0.5
+    qbm2_hf = this%qbm**2 * 0.5
+    max_mode = ef%get_max_mode()
+  
+    ef_re => ef%get_rf_re()
+    ef_im => ef%get_rf_im()
+    bf_re => bf%get_rf_re()
+    bf_im => bf%get_rf_im()
+  
+    ar_re => af%get_cfr_re()
+    ar_im => af%get_cfr_im()
+    ai_re => af%get_cfi_re()
+    ai_im => af%get_cfi_im()
+  
+    ar_grad_re => af%ar_grad_re
+    ar_grad_im => af%ar_grad_im
+    ai_grad_re => af%ai_grad_re
+    ai_grad_im => af%ai_grad_im
+  
+    noff = ef_re(0)%get_noff(1)
+    do ptrcur = 1, this%npp, p_cache_size
+  
+      ! check if last copy of table and set np
+      if( ptrcur + p_cache_size > this%npp ) then
+        np = this%npp - ptrcur + 1
+      else
+        np = p_cache_size
+      endif
+  
+      ! interpolate fields to particles
+      call gen_interp_info(this%x, this%dr, noff, np, ptrcur, weight, idx, pcos, psin)
+      call interp_field(ef_re, ef_im, max_mode, weight, idx, pcos, psin, np, ep)
+      call interp_field(bf_re, bf_im, max_mode, weight, idx, pcos, psin, np, bp)
+      call interp_field(ar_re, ar_im, max_mode, weight, idx, pcos, psin, np, apr)
+      call interp_field(ai_re, ai_im, max_mode, weight, idx, pcos, psin, np, api)
+      call interp_field(ar_grad_re, ar_grad_im, max_mode, weight, idx, pcos, psin, np, apr_grad)
+      call interp_field(ai_grad_re, ai_grad_im, max_mode, weight, idx, pcos, psin, np, api_grad)
+      call transform_to_cartesian(ep, np, pcos, psin)
+      call transform_to_cartesian(bp, np, pcos, psin)
+      call transform_to_cartesian(apr_grad, np, pcos, psin)
+      call transform_to_cartesian(api_grad, np, pcos, psin)
+  
+      ! note that the gamma and momenta calculated in this subroutine are the ones after averaging.
+      pp = ptrcur
+      do i = 1, np
+  
+        ! calculate the correction factor for gamma due to ponderomotive force
+        gam_corr = qbm2_hf * (apr(i)**2 + api(i)**2)
+  
+        ! calculate the effective electric fields
+        ! note that this%gamma is already time-centered
+        tmp = 0.5 * this%qbm / this%gamma(pp)
+        ep(1, i) = ep(1, i) - tmp * ( apr(i) * apr_grad(1, i) + api(i) * api_grad(1, i) )
+        ep(2, i) = ep(2, i) - tmp * ( apr(i) * apr_grad(2, i) + api(i) * api_grad(2, i) )
+        ep(3, i) = ep(3, i) + tmp * ( apr(i) * apr_grad(3, i) + api(i) * api_grad(3, i) )
+  
+        ! normalize the fields
+        qtmh_b = qtmh / (1.0 - this%qbm * this%psi(pp))
+        qtmh_e = qtmh_b * this%gamma(pp)
+        ep(:, i) = ep(:, i) * qtmh_e
+        bp(:, i) = bp(:, i) * qtmh_b
+  
+        ! half acceleration due to effective electric field
+        utmp(:) = this%p(:, pp) + ep(:, i)
+  
+        ! rotation about magnetic field
+        this%p(1, pp) = utmp(1) + utmp(2) * bp(3, i) - utmp(3) * bp(2, i)
+        this%p(2, pp) = utmp(2) + utmp(3) * bp(1, i) - utmp(1) * bp(3, i)
+        this%p(3, pp) = utmp(3) + utmp(1) * bp(2, i) - utmp(2) * bp(1, i)
+  
+        ostq = 2.0 / (1.0 + bp(1, i)**2 + bp(2, i)**2 + bp(3, i)**2)
+        bp(1, i) = bp(1, i) * ostq
+        bp(2, i) = bp(2, i) * ostq
+        bp(3, i) = bp(3, i) * ostq
+  
+        utmp(1) = utmp(1) + this%p(2, pp) * bp(3, i) - this%p(3, pp) * bp(2, i)
+        utmp(2) = utmp(2) + this%p(3, pp) * bp(1, i) - this%p(1, pp) * bp(3, i)
+        utmp(3) = utmp(3) + this%p(1, pp) * bp(2, i) - this%p(2, pp) * bp(1, i)
+  
+        ! half acceleration due to effective electric field
+        this%p(:, pp) = utmp(:) + ep(:, i)
+  
+        ! calculate time-centered gamma for push of position
+        tmp = apr_grad(3, i) * dt; gam_corr = gam_corr + qbm2_hf * (apr(i) + 0.25 * tmp) * tmp
+        tmp = api_grad(3, i) * dt; gam_corr = gam_corr + qbm2_hf * (api(i) + 0.25 * tmp) * tmp
+        this%gamma(pp) = sqrt(1.0 + this%p(1, pp)**2 + this%p(2, pp)**2 + this%p(3, pp)**2 + gam_corr)
+  
+        pp = pp + 1
+      enddo
+  
+    enddo
+    
+    call stop_tprof( 'push 2D particles' )
+    call write_dbg(cls_name, sname, cls_level, 'ends')
+  
+end subroutine PUSH_STD_PGC
+  
+#undef SPLINE
+#undef CHIDEPOSIT
+#undef QDEPOSIT
+! #undef AX_COOR
+#undef LINEAR
+#undef QUADRATIC
+! #undef CORRNUM
+#undef LP
+#undef UP
+#undef AMJ_STD
+#undef AMJ_ROB
+#undef AMJ_STD_PGC
+#undef AMJ_ROB_PGC
+#undef PUSH_STD
+#undef PUSH_ROB
+#undef PUSH_ROB_PGC
+#undef PUSH_STD_PGC
+
+#endif
\ No newline at end of file
diff --git a/source/species/species2d_class.f03 b/source/species/species2d_class.f03
index 852a5ef..696dfbb 100644
--- a/source/species/species2d_class.f03
+++ b/source/species/species2d_class.f03
@@ -30,6 +30,7 @@ module species2d_class
    class(field_jay), allocatable :: cu, amu
    class(field_djdxi), allocatable :: dcu
    class(fdist2d), pointer :: pf => null()
+   integer :: part_shape
    integer :: push_type
 
    contains
@@ -94,6 +95,7 @@ subroutine init_species2d( this, opts, pf, part_shape, max_mode, qbm, s, &
    this%pf        => pf
    dt             = opts%get_dxi()
    this%push_type = push_type
+   this%part_shape = part_shape
    ntd            = pf%neutralized
 
    allocate( this%q, this%cu, this%amu, this%dcu )
@@ -101,11 +103,11 @@ subroutine init_species2d( this, opts, pf, part_shape, max_mode, qbm, s, &
    smth_ord = 0
    if (present(smooth_order)) smth_ord = smooth_order
 
-   call this%q%new(opts,max_mode,part_shape,smth_ord)
-   call this%cu%new(opts,max_mode,part_shape,smth_ord)
-   call this%dcu%new(opts,max_mode,part_shape,smth_ord)
-   call this%amu%new(opts,max_mode,part_shape,smth_ord)
-   call this%part%new(opts,pf,qbm,dt,s)
+   call this%q%new(opts,max_mode,this%part_shape,smth_ord)
+   call this%cu%new(opts,max_mode,this%part_shape,smth_ord)
+   call this%dcu%new(opts,max_mode,this%part_shape,smth_ord)
+   call this%amu%new(opts,max_mode,this%part_shape,smth_ord)
+   call this%part%new(opts,pf,qbm,dt,s,this%part_shape)
 
    this%q  = 0.0
    this%cu = 0.0