#include<math.h>#include<stdio.h>#include<stdlib.h>#include<fftw3.h>#ifdef USE_OPENMP#include<omp.h>#endif#include"acceleration.h"#include"error.h"#include"system.h"#include"math_functions.h"IN_FILEvoidcloud_in_cell(double*restrictdelta,constdouble*restrictx,constdouble*restrictm,constintnum_particles,constintpm_grid_size,constdouble*restrictbox_center,constdoublebox_length){constintgrid_size_2=pm_grid_size*pm_grid_size;constintgrid_size_3=pm_grid_size*pm_grid_size*pm_grid_size;/* Clear the grid */for(inti=0;i<grid_size_3;i++){delta[i]=0.0;}constdoublebox_width=box_length/2.0;constdoublecell_length=box_length/pm_grid_size;constdoubleinv_cell_length=1.0/cell_length;#ifdef USE_OPENMP#pragma omp parallel for#endiffor(inti=0;i<num_particles;i++){constdoublex_normalized=(x[i*3+0]-box_center[0]+box_width)*inv_cell_length;constdoubley_normalized=(x[i*3+1]-box_center[1]+box_width)*inv_cell_length;constdoublez_normalized=(x[i*3+2]-box_center[2]+box_width)*inv_cell_length;constintn_x=(int)x_normalized;constintn_y=(int)y_normalized;constintn_z=(int)z_normalized;constdoubleweight_x=x_normalized-n_x;constdoubleweight_y=y_normalized-n_y;constdoubleweight_z=z_normalized-n_z;constdoubleweight_x_m=1.0-weight_x;constdoubleweight_y_m=1.0-weight_y;constdoubleweight_z_m=1.0-weight_z;delta[(n_x%pm_grid_size)*grid_size_2+(n_y%pm_grid_size)*pm_grid_size+(n_z%pm_grid_size)]+=m[i]*weight_x_m*weight_y_m*weight_z_m;delta[((n_x+1)%pm_grid_size)*grid_size_2+(n_y%pm_grid_size)*pm_grid_size+(n_z%pm_grid_size)]+=m[i]*weight_x*weight_y_m*weight_z_m;delta[(n_x%pm_grid_size)*grid_size_2+((n_y+1)%pm_grid_size)*pm_grid_size+(n_z%pm_grid_size)]+=m[i]*weight_x_m*weight_y*weight_z_m;delta[(n_x%pm_grid_size)*grid_size_2+(n_y%pm_grid_size)*pm_grid_size+((n_z+1)%pm_grid_size)]+=m[i]*weight_x_m*weight_y_m*weight_z;delta[((n_x+1)%pm_grid_size)*grid_size_2+((n_y+1)%pm_grid_size)*pm_grid_size+(n_z%pm_grid_size)]+=m[i]*weight_x*weight_y*weight_z_m;delta[((n_x+1)%pm_grid_size)*grid_size_2+(n_y%pm_grid_size)*pm_grid_size+((n_z+1)%pm_grid_size)]+=m[i]*weight_x*weight_y_m*weight_z;delta[(n_x%pm_grid_size)*grid_size_2+((n_y+1)%pm_grid_size)*pm_grid_size+((n_z+1)%pm_grid_size)]+=m[i]*weight_x_m*weight_y*weight_z;delta[((n_x+1)%pm_grid_size)*grid_size_2+((n_y+1)%pm_grid_size)*pm_grid_size+((n_z+1)%pm_grid_size)]+=m[i]*weight_x*weight_y*weight_z;}constdoublecell_volume=cell_length*cell_length*cell_length;for(inti=0;i<grid_size_3;i++){delta[i]/=cell_volume;}}IN_FILEvoidget_cloud_in_cell_acceleration(double*restricta,constdouble*restrictx,constdouble*restrictacc_grid,constintnum_particles,constintpm_grid_size,constdouble*restrictbox_center,constdoublebox_length){constdoublebox_width=box_length/2.0;constdoublecell_length=box_length/pm_grid_size;constdoubleinv_cell_length=1.0/cell_length;constintgrid_size_2=pm_grid_size*pm_grid_size;#ifdef USE_OPENMP#pragma omp parallel for#endiffor(inti=0;i<num_particles;i++){constdoublex_normalized=(x[i*3+0]-box_center[0]+box_width)*inv_cell_length;constdoubley_normalized=(x[i*3+1]-box_center[1]+box_width)*inv_cell_length;constdoublez_normalized=(x[i*3+2]-box_center[2]+box_width)*inv_cell_length;constintn_x=(int)x_normalized;constintn_y=(int)y_normalized;constintn_z=(int)z_normalized;constdoubleweight_x=x_normalized-n_x;constdoubleweight_y=y_normalized-n_y;constdoubleweight_z=z_normalized-n_z;constdoubleweight_x_m=1.0-weight_x;constdoubleweight_y_m=1.0-weight_y;constdoubleweight_z_m=1.0-weight_z;for(intj=0;j<3;j++){a[i*3+j]=(acc_grid[((n_x%pm_grid_size)*grid_size_2+(n_y%pm_grid_size)*pm_grid_size+(n_z%pm_grid_size))*3+j]*weight_x_m*weight_y_m*weight_z_m+acc_grid[(((n_x+1)%pm_grid_size)*grid_size_2+(n_y%pm_grid_size)*pm_grid_size+(n_z%pm_grid_size))*3+j]*weight_x*weight_y_m*weight_z_m+acc_grid[((n_x%pm_grid_size)*grid_size_2+((n_y+1)%pm_grid_size)*pm_grid_size+(n_z%pm_grid_size))*3+j]*weight_x_m*weight_y*weight_z_m+acc_grid[((n_x%pm_grid_size)*grid_size_2+(n_y%pm_grid_size)*pm_grid_size+((n_z+1)%pm_grid_size))*3+j]*weight_x_m*weight_y_m*weight_z+acc_grid[(((n_x+1)%pm_grid_size)*grid_size_2+((n_y+1)%pm_grid_size)*pm_grid_size+(n_z%pm_grid_size))*3+j]*weight_x*weight_y*weight_z_m+acc_grid[(((n_x+1)%pm_grid_size)*grid_size_2+(n_y%pm_grid_size)*pm_grid_size+((n_z+1)%pm_grid_size))*3+j]*weight_x*weight_y_m*weight_z+acc_grid[((n_x%pm_grid_size)*grid_size_2+((n_y+1)%pm_grid_size)*pm_grid_size+((n_z+1)%pm_grid_size))*3+j]*weight_x_m*weight_y*weight_z+acc_grid[(((n_x+1)%pm_grid_size)*grid_size_2+((n_y+1)%pm_grid_size)*pm_grid_size+((n_z+1)%pm_grid_size))*3+j]*weight_x*weight_y*weight_z);}}}IN_FILEvoidcompute_acceleration_with_gradient(double*restrictacc_grid,constdouble*restrictphi,constintpm_grid_size,constdoublebox_length){constdoublecell_length=box_length/pm_grid_size;constintgrid_size_2=pm_grid_size*pm_grid_size;#ifdef USE_OPENMP#pragma omp parallel for#endiffor(inti=0;i<pm_grid_size;i++){constinti_m_2=(i-2+pm_grid_size)%pm_grid_size;constinti_m=(i-1+pm_grid_size)%pm_grid_size;constinti_p=(i+1)%pm_grid_size;constinti_p_2=(i+2)%pm_grid_size;for(intj=0;j<pm_grid_size;j++){constintj_m_2=(j-2+pm_grid_size)%pm_grid_size;constintj_m=(j-1+pm_grid_size)%pm_grid_size;constintj_p=(j+1)%pm_grid_size;constintj_p_2=(j+2)%pm_grid_size;for(intk=0;k<pm_grid_size;k++){constintk_m_2=(k-2+pm_grid_size)%pm_grid_size;constintk_m=(k-1+pm_grid_size)%pm_grid_size;constintk_p=(k+1)%pm_grid_size;constintk_p_2=(k+2)%pm_grid_size;constintindex=i*grid_size_2+j*pm_grid_size+k;constintindex_x_m_2=i_m_2*grid_size_2+j*pm_grid_size+k;constintindex_x_m=i_m*grid_size_2+j*pm_grid_size+k;constintindex_x_p=i_p*grid_size_2+j*pm_grid_size+k;constintindex_x_p_2=i_p_2*grid_size_2+j*pm_grid_size+k;constintindex_y_m_2=i*grid_size_2+j_m_2*pm_grid_size+k;constintindex_y_m=i*grid_size_2+j_m*pm_grid_size+k;constintindex_y_p=i*grid_size_2+j_p*pm_grid_size+k;constintindex_y_p_2=i*grid_size_2+j_p_2*pm_grid_size+k;constintindex_z_m_2=i*grid_size_2+j*pm_grid_size+k_m_2;constintindex_z_m=i*grid_size_2+j*pm_grid_size+k_m;constintindex_z_p=i*grid_size_2+j*pm_grid_size+k_p;constintindex_z_p_2=i*grid_size_2+j*pm_grid_size+k_p_2;acc_grid[index*3+0]=(phi[index_x_m_2]-8.0*phi[index_x_m]+8.0*phi[index_x_p]-phi[index_x_p_2])/(12.0*cell_length);acc_grid[index*3+1]=(phi[index_y_m_2]-8.0*phi[index_y_m]+8.0*phi[index_y_p]-phi[index_y_p_2])/(12.0*cell_length);acc_grid[index*3+2]=(phi[index_z_m_2]-8.0*phi[index_z_m]+8.0*phi[index_z_p]-phi[index_z_p_2])/(12.0*cell_length);}}}}WIN32DLL_APIErrorStatusacceleration_PM(double*restricta,constCosmologicalSystem*restrictsystem,constdoubleG,constintpm_grid_size){ErrorStatuserror_status;/* Declare variables */constintnum_particles=system->num_particles;constdouble*restrictx=system->x;constdouble*restrictm=system->m;constdouble*restrictbox_center=system->box_center;constdoublebox_width=system->box_width;constdoublebox_length=box_width*2.0;constdoublescale_factor=system->scale_factor;constintgrid_size_2=pm_grid_size*pm_grid_size;constintgrid_size_3=pm_grid_size*pm_grid_size*pm_grid_size;double*restrictacc_grid=malloc(grid_size_3*3*sizeof(double));double*restrictdelta=fftw_malloc(grid_size_3*sizeof(double));fftw_complex*delta_fourier=fftw_malloc(grid_size_2*(pm_grid_size/2+1)*sizeof(fftw_complex));double*restrictphi=fftw_malloc(grid_size_3*sizeof(double));if(!acc_grid||!delta||!delta_fourier||!phi){error_status=WRAP_RAISE_ERROR(GRAV_MEMORY_ERROR,"Failed to allocate memory for particle mesh acceleration");gotoerr_malloc;}/* Deposit mass onto the grid */cloud_in_cell(delta,x,m,num_particles,pm_grid_size,box_center,box_length);/* Compute the density perturbation in Fourier space */fftw_planplan_forward=fftw_plan_dft_r2c_3d(pm_grid_size,pm_grid_size,pm_grid_size,delta,delta_fourier,FFTW_ESTIMATE);if(!plan_forward){error_status=WRAP_RAISE_ERROR(GRAV_MEMORY_ERROR,"Failed to create FFTW plan for forward transform");gotoerr_fftw_plan_forward;}fftw_execute(plan_forward);constinthalf_grid_size=pm_grid_size/2+1;constdoubletwo_pi_over_box_length_squared=4.0*M_PI*M_PI/(box_length*box_length);#ifdef USE_OPENMP#pragma omp parallel for#endiffor(inti=0;i<pm_grid_size;i++){constintk_x=(i<half_grid_size)?i:i-pm_grid_size;for(intj=0;j<pm_grid_size;j++){constintk_y=(j<half_grid_size)?j:j-pm_grid_size;for(intk=0;k<half_grid_size;k++){constintk_z=k;constdoublek_sq=(k_x*k_x+k_y*k_y+k_z*k_z)*two_pi_over_box_length_squared;constdoublekernel=k_sq==0.0?0.0:-1.0/k_sq;constintindex=i*(pm_grid_size*half_grid_size)+j*half_grid_size+k;delta_fourier[index][0]*=kernel;delta_fourier[index][1]*=kernel;}}}/* Compute the potential in real space */fftw_planplan_backward=fftw_plan_dft_c2r_3d(pm_grid_size,pm_grid_size,pm_grid_size,delta_fourier,phi,FFTW_ESTIMATE);if(!plan_backward){error_status=WRAP_RAISE_ERROR(GRAV_MEMORY_ERROR,"Failed to create FFTW plan for backward transform");gotoerr_fftw_plan_backward;}fftw_execute(plan_backward);constdoublefactor=4.0*M_PI*G/(scale_factor*(double)grid_size_3);for(inti=0;i<grid_size_3;i++){phi[i]*=factor;}/* Compute the force by taking the gradient of the potential */compute_acceleration_with_gradient(acc_grid,phi,pm_grid_size,box_length);/* Add the acceleration to the particles */get_cloud_in_cell_acceleration(a,x,acc_grid,num_particles,pm_grid_size,box_center,box_length);/* Free memory */free(acc_grid);fftw_free(delta);fftw_free(delta_fourier);fftw_free(phi);fftw_destroy_plan(plan_forward);fftw_destroy_plan(plan_backward);returnmake_success_error_status();err_fftw_plan_backward:fftw_destroy_plan(plan_backward);err_fftw_plan_forward:fftw_destroy_plan(plan_forward);err_malloc:free(acc_grid);fftw_free(delta);fftw_free(delta_fourier);fftw_free(phi);returnerror_status;}
For cosmological simulations, we are usually interested in a large domain with
periodic boundary conditions. Interestingly, grid-based algorithm
like the Particle Mesh method naturally provides periodic boundary conditions
by solving the Poisson's equation in the Fourier Space,
\[\begin{equation}
\label{eqn:poisson}
\nabla^2 \Phi(x) = 4 \pi G \rho(\textbf{x})
\implies - \textbf{k}^2 \Phi(\textbf{k}) = 4 \pi G \rho(\textbf{k}).
\end{equation}\]
The overall time complexity is \(\mathcal{O}(N + N_{\textnormal{cell} } \log N_{\textnormal{cell} })\),
with \(\mathcal{O}(N)\) for interpolation and \(\mathcal{O}(N_{\textnormal{cell} } \log N_{\textnormal{cell} })\) for FFT.
Therefore, this method is very fast. However, it is not accurate for short range force where
the length scale is smaller than the grid resolution. Thus, it is not suitable for inhomogeneous
distributions with high density regions.
In this section, we would focus on the details in implementing the Particle Mesh algorithm.
There are three steps in the algorithm:
Estimating the density of the underlying grid,
Solving the Poisson's equation in fourier space, and
Computing the acceleration by taking a gradient of the potential in real space
and interpolating the acceleration to the particle.
Density estimation
In a sense, N-body simulation is a Monte Carlo technique for large scale simulations.
Each particle is a sampling drawn from a distribution at \(t = 0\), where all \(N\)
particles together models the underlying continuous distribution1.
Therefore, when estimating the density of the underlying grid, we should not think of the particles as a
localized point with extreme high density (e.g. an enormous star), but a cloud of mass that
is represented as a particle due to limited computational capability.
In figure 1, we shows the comparison between the Nearest Grid Point (NGP)
scheme versus the Cloud-In-Cell (CIC) scheme. The latter provides a much
smoother estimation of density. In 3D, a particle at \(\textbf{x}_p = (x_p, y_p, z_p)\) could contribute density to its
parent cell \(\textbf{x}_c = (x_c, y_c, z_c)\) and seven neighboring cells. Define the weightings
where \(\rho_p = m_p / V_{\textnormal{unit cell}}\).
Figure 1: Two different schemes in estimating density in a discretized particle mesh.
Solving the Poisson's equation
Now, we have a grid with assigned density value on each grid point. To transform it into
Fourier Space, we simply do a discrete fourier transform using FFT libraries
(e.g. FFTW in C or numpy.fft in Python).
Then, we compute the wave numbers by
where \(\textbf{L} = (L_x, L_y, L_z)\) is the box length, and
\(\textbf{n} = (n_x, n_y, n_z) \in [0, N)\)
is the grid indices (one may need to wrap the indices above \(\frac{N}{2}\) to \(-\frac{N}{2}\) so that
\((n_x, n_y, n_z) \in [- \frac{N}{2}, \frac{N}{2})\)).
Then, we simply compute \(\textbf{k}^2\) and obtain the potential in Fourier Space
Now, we do an inverse discrete fourier transform to obtain the potential on the grid points.
Then, the acceleration can be obtained by computing \(\textbf{a} = - \nabla \Phi\) by a central
finite difference scheme with fourth order accuracy: