Particle-Mesh algorithm

Source code (Click to expand)

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <fftw3.h>

#ifdef USE_OPENMP
#include <omp.h>
#endif

#include "acceleration.h"
#include "error.h"
#include "system.h"
#include "math_functions.h"

IN_FILE void cloud_in_cell(
    double *restrict delta,
    const double *restrict x,
    const double *restrict m,
    const int num_particles,
    const int pm_grid_size,
    const double *restrict box_center,
    const double box_length
)
{
    const int grid_size_2 = pm_grid_size * pm_grid_size;
    const int grid_size_3 = pm_grid_size * pm_grid_size * pm_grid_size; 

    /* Clear the grid */
    for (int i = 0; i < grid_size_3; i++)
    {
        delta[i] = 0.0;
    }

    const double box_width = box_length / 2.0;
    const double cell_length = box_length / pm_grid_size;
    const double inv_cell_length = 1.0 / cell_length;

#ifdef USE_OPENMP
    #pragma omp parallel for
#endif
    for (int i = 0; i < num_particles; i++)
    {
        const double x_normalized = (x[i * 3 + 0] - box_center[0] + box_width) * inv_cell_length;
        const double y_normalized = (x[i * 3 + 1] - box_center[1] + box_width) * inv_cell_length;
        const double z_normalized = (x[i * 3 + 2] - box_center[2] + box_width) * inv_cell_length;

        const int n_x = (int) x_normalized;
        const int n_y = (int) y_normalized;
        const int n_z = (int) z_normalized;

        const double weight_x = x_normalized - n_x;
        const double weight_y = y_normalized - n_y;
        const double weight_z = z_normalized - n_z;

        const double weight_x_m = 1.0 - weight_x;
        const double weight_y_m = 1.0 - weight_y;
        const double weight_z_m = 1.0 - weight_z;

        delta[
            (n_x       % pm_grid_size) * grid_size_2 +
            (n_y       % pm_grid_size) * pm_grid_size +
            (n_z       % pm_grid_size)
        ] += m[i] * weight_x_m * weight_y_m * weight_z_m;
        delta[
            ((n_x + 1) % pm_grid_size) * grid_size_2 +
            (n_y       % pm_grid_size) * pm_grid_size +
            (n_z       % pm_grid_size)
        ] += m[i] * weight_x * weight_y_m * weight_z_m;
        delta[
            (n_x       % pm_grid_size) * grid_size_2 +
            ((n_y + 1) % pm_grid_size) * pm_grid_size +
            (n_z       % pm_grid_size)
        ] += m[i] * weight_x_m * weight_y * weight_z_m;
        delta[
            (n_x       % pm_grid_size) * grid_size_2 +
            (n_y       % pm_grid_size) * pm_grid_size +
            ((n_z + 1) % pm_grid_size)
        ] += m[i] * weight_x_m * weight_y_m * weight_z;
        delta[
            ((n_x + 1) % pm_grid_size) * grid_size_2 +
            ((n_y + 1) % pm_grid_size) * pm_grid_size +
            (n_z       % pm_grid_size)
        ] += m[i] * weight_x * weight_y * weight_z_m;
        delta[
            ((n_x + 1) % pm_grid_size) * grid_size_2 +
            (n_y       % pm_grid_size) * pm_grid_size +
            ((n_z + 1) % pm_grid_size)
        ] += m[i] * weight_x * weight_y_m * weight_z;
        delta[
            (n_x       % pm_grid_size) * grid_size_2 +
            ((n_y + 1) % pm_grid_size) * pm_grid_size +
            ((n_z + 1) % pm_grid_size)
        ] += m[i] * weight_x_m * weight_y * weight_z;
        delta[
            ((n_x + 1) % pm_grid_size) * grid_size_2 +
            ((n_y + 1) % pm_grid_size) * pm_grid_size +
            ((n_z + 1) % pm_grid_size)
        ] += m[i] * weight_x * weight_y * weight_z;
    }

    const double cell_volume = cell_length * cell_length * cell_length;
    for (int i = 0; i < grid_size_3; i++)
    {
        delta[i] /= cell_volume;
    }
}

IN_FILE void get_cloud_in_cell_acceleration(
    double *restrict a,
    const double *restrict x,
    const double *restrict acc_grid,
    const int num_particles,
    const int pm_grid_size,
    const double *restrict box_center,
    const double box_length
)
{
    const double box_width = box_length / 2.0;
    const double cell_length = box_length / pm_grid_size;
    const double inv_cell_length = 1.0 / cell_length;
    const int grid_size_2 = pm_grid_size * pm_grid_size;

#ifdef USE_OPENMP
    #pragma omp parallel for
#endif
    for (int i = 0; i < num_particles; i++)
    {
        const double x_normalized = (x[i * 3 + 0] - box_center[0] + box_width) * inv_cell_length;
        const double y_normalized = (x[i * 3 + 1] - box_center[1] + box_width) * inv_cell_length;
        const double z_normalized = (x[i * 3 + 2] - box_center[2] + box_width) * inv_cell_length;

        const int n_x = (int) x_normalized;
        const int n_y = (int) y_normalized;
        const int n_z = (int) z_normalized;

        const double weight_x = x_normalized - n_x;
        const double weight_y = y_normalized - n_y;
        const double weight_z = z_normalized - n_z;

        const double weight_x_m = 1.0 - weight_x;
        const double weight_y_m = 1.0 - weight_y;
        const double weight_z_m = 1.0 - weight_z;

        for (int j = 0; j < 3; j++)
        {
            a[i * 3 + j] = (
                acc_grid[
                    (
                        (n_x       % pm_grid_size) * grid_size_2 +
                        (n_y       % pm_grid_size) * pm_grid_size +
                        (n_z       % pm_grid_size)
                    ) * 3 + j
                ] * weight_x_m * weight_y_m * weight_z_m
                + acc_grid[
                    (
                        ((n_x + 1) % pm_grid_size) * grid_size_2 +
                        (n_y       % pm_grid_size) * pm_grid_size +
                        (n_z       % pm_grid_size)
                    ) * 3 + j
                ] * weight_x * weight_y_m * weight_z_m
                + acc_grid[
                    (
                        (n_x       % pm_grid_size) * grid_size_2 +
                        ((n_y + 1) % pm_grid_size) * pm_grid_size +
                        (n_z       % pm_grid_size)
                    ) * 3 + j
                ] * weight_x_m * weight_y * weight_z_m
                + acc_grid[
                    (
                        (n_x       % pm_grid_size) * grid_size_2 +
                        (n_y       % pm_grid_size) * pm_grid_size +
                        ((n_z + 1) % pm_grid_size)
                    ) * 3 + j
                ] * weight_x_m * weight_y_m * weight_z
                + acc_grid[
                    (
                        ((n_x + 1) % pm_grid_size) * grid_size_2 +
                        ((n_y + 1) % pm_grid_size) * pm_grid_size +
                        (n_z       % pm_grid_size)
                    ) * 3 + j
                ] * weight_x * weight_y * weight_z_m
                + acc_grid[
                    (
                        ((n_x + 1) % pm_grid_size) * grid_size_2 +
                        (n_y       % pm_grid_size) * pm_grid_size +
                        ((n_z + 1) % pm_grid_size)
                    ) * 3 + j
                ] * weight_x * weight_y_m * weight_z
                + acc_grid[
                    (
                        (n_x       % pm_grid_size) * grid_size_2 +
                        ((n_y + 1) % pm_grid_size) * pm_grid_size +
                        ((n_z + 1) % pm_grid_size)
                    ) * 3 + j
                ] * weight_x_m * weight_y * weight_z
                + acc_grid[
                    (
                        ((n_x + 1) % pm_grid_size) * grid_size_2 +
                        ((n_y + 1) % pm_grid_size) * pm_grid_size +
                        ((n_z + 1) % pm_grid_size)
                    ) * 3 + j
                ] * weight_x * weight_y * weight_z
            );
        }
    }
}

IN_FILE void compute_acceleration_with_gradient(
    double *restrict acc_grid,
    const double *restrict phi,
    const int pm_grid_size,
    const double box_length
)
{
    const double cell_length = box_length / pm_grid_size;
    const int grid_size_2 = pm_grid_size * pm_grid_size;
#ifdef USE_OPENMP
    #pragma omp parallel for
#endif
    for (int i = 0; i < pm_grid_size; i++)
    {
        const int i_m_2 = (i - 2 + pm_grid_size) % pm_grid_size;
        const int i_m = (i - 1 + pm_grid_size) % pm_grid_size;
        const int i_p = (i + 1) % pm_grid_size;
        const int i_p_2 = (i + 2) % pm_grid_size;
        for (int j = 0; j < pm_grid_size; j++)
        {
            const int j_m_2 = (j - 2 + pm_grid_size) % pm_grid_size;
            const int j_m = (j - 1 + pm_grid_size) % pm_grid_size;
            const int j_p = (j + 1) % pm_grid_size;
            const int j_p_2 = (j + 2) % pm_grid_size;
            for (int k = 0; k < pm_grid_size; k++)
            {
                const int k_m_2 = (k - 2 + pm_grid_size) % pm_grid_size;
                const int k_m = (k - 1 + pm_grid_size) % pm_grid_size;
                const int k_p = (k + 1) % pm_grid_size;
                const int k_p_2 = (k + 2) % pm_grid_size;

                const int index     = i * grid_size_2 + j * pm_grid_size + k;

                const int index_x_m_2 = i_m_2 * grid_size_2 + j * pm_grid_size + k;
                const int index_x_m   = i_m * grid_size_2 + j * pm_grid_size + k;
                const int index_x_p   = i_p * grid_size_2 + j * pm_grid_size + k;
                const int index_x_p_2 = i_p_2 * grid_size_2 + j * pm_grid_size + k;

                const int index_y_m_2 = i * grid_size_2 + j_m_2 * pm_grid_size + k;
                const int index_y_m   = i * grid_size_2 + j_m * pm_grid_size + k;
                const int index_y_p   = i * grid_size_2 + j_p * pm_grid_size + k;
                const int index_y_p_2 = i * grid_size_2 + j_p_2 * pm_grid_size + k;

                const int index_z_m_2 = i * grid_size_2 + j * pm_grid_size + k_m_2;
                const int index_z_m   = i * grid_size_2 + j * pm_grid_size + k_m;
                const int index_z_p   = i * grid_size_2 + j * pm_grid_size + k_p;
                const int index_z_p_2 = i * grid_size_2 + j * pm_grid_size + k_p_2;

                acc_grid[index * 3 + 0] = (
                    phi[index_x_m_2] - 8.0 * phi[index_x_m] + 8.0 * phi[index_x_p] - phi[index_x_p_2]
                ) / (12.0 * cell_length);
                acc_grid[index * 3 + 1] = (
                    phi[index_y_m_2] - 8.0 * phi[index_y_m] + 8.0 * phi[index_y_p] - phi[index_y_p_2]
                ) / (12.0 * cell_length);
                acc_grid[index * 3 + 2] = (
                    phi[index_z_m_2] - 8.0 * phi[index_z_m] + 8.0 * phi[index_z_p] - phi[index_z_p_2]
                ) / (12.0 * cell_length);
            }
        }
    }
}

WIN32DLL_API ErrorStatus acceleration_PM(
    double *restrict a,
    const CosmologicalSystem *restrict system,
    const double G,
    const int pm_grid_size
)
{
    ErrorStatus error_status;

    /* Declare variables */
    const int num_particles = system->num_particles;
    const double *restrict x = system->x;
    const double *restrict m = system->m;
    const double *restrict box_center = system->box_center;
    const double box_width = system->box_width;
    const double box_length = box_width * 2.0;
    const double scale_factor = system->scale_factor;

    const int grid_size_2 = pm_grid_size * pm_grid_size;
    const int grid_size_3 = pm_grid_size * pm_grid_size * pm_grid_size;

    double *restrict acc_grid = malloc(grid_size_3 * 3 * sizeof(double));
    double *restrict delta = fftw_malloc(grid_size_3 * sizeof(double));
    fftw_complex *delta_fourier = fftw_malloc(grid_size_2 * (pm_grid_size / 2 + 1) * sizeof(fftw_complex));
    double *restrict phi = fftw_malloc(grid_size_3 * sizeof(double));
    if (!acc_grid || !delta || !delta_fourier || !phi)
    {
        error_status = WRAP_RAISE_ERROR(GRAV_MEMORY_ERROR, "Failed to allocate memory for particle mesh acceleration");
        goto err_malloc;
    }

    /* Deposit mass onto the grid */
    cloud_in_cell(delta, x, m, num_particles, pm_grid_size, box_center, box_length);

    /* Compute the density perturbation in Fourier space */
    fftw_plan plan_forward = fftw_plan_dft_r2c_3d(pm_grid_size, pm_grid_size, pm_grid_size, delta, delta_fourier, FFTW_ESTIMATE);
    if (!plan_forward)
    {
        error_status = WRAP_RAISE_ERROR(GRAV_MEMORY_ERROR, "Failed to create FFTW plan for forward transform");
        goto err_fftw_plan_forward;
    }
    fftw_execute(plan_forward);

    const int half_grid_size = pm_grid_size / 2 + 1;
    const double two_pi_over_box_length_squared = 4.0 * M_PI * M_PI / (box_length * box_length);
#ifdef USE_OPENMP
    #pragma omp parallel for
#endif
    for (int i = 0; i < pm_grid_size; i++)
    {
        const int k_x = (i < half_grid_size) ? i : i - pm_grid_size;
        for (int j = 0; j < pm_grid_size; j++)
        {
            const int k_y = (j < half_grid_size) ? j : j - pm_grid_size;
            for (int k = 0; k < half_grid_size; k++)
            {
                const int k_z = k;
                const double k_sq = (k_x * k_x + k_y * k_y + k_z * k_z) * two_pi_over_box_length_squared;

                const double kernel = k_sq == 0.0 ? 0.0 : -1.0 / k_sq;

                const int index = i * (pm_grid_size * half_grid_size) + j * half_grid_size + k;
                delta_fourier[index][0] *= kernel;
                delta_fourier[index][1] *= kernel;
            }
        }
    }

    /* Compute the potential in real space */
    fftw_plan plan_backward = fftw_plan_dft_c2r_3d(pm_grid_size, pm_grid_size, pm_grid_size, delta_fourier, phi, FFTW_ESTIMATE);
    if (!plan_backward)
    {
        error_status = WRAP_RAISE_ERROR(GRAV_MEMORY_ERROR, "Failed to create FFTW plan for backward transform");
        goto err_fftw_plan_backward;
    }
    fftw_execute(plan_backward);

    const double factor = 4.0 * M_PI * G / (scale_factor * (double) grid_size_3);
    for (int i = 0; i < grid_size_3; i++)
    {
        phi[i] *= factor;
    }

    /* Compute the force by taking the gradient of the potential */
    compute_acceleration_with_gradient(
        acc_grid,
        phi, pm_grid_size, box_length
    );

    /* Add the acceleration to the particles */
    get_cloud_in_cell_acceleration(
        a, x, acc_grid,
        num_particles, pm_grid_size, box_center, box_length
    );

    /* Free memory */
    free(acc_grid);
    fftw_free(delta);
    fftw_free(delta_fourier);
    fftw_free(phi);
    fftw_destroy_plan(plan_forward);
    fftw_destroy_plan(plan_backward);

    return make_success_error_status();

err_fftw_plan_backward:
    fftw_destroy_plan(plan_backward);
err_fftw_plan_forward:
    fftw_destroy_plan(plan_forward);
err_malloc:
    free(acc_grid);
    fftw_free(delta);
    fftw_free(delta_fourier);
    fftw_free(phi);

    return error_status;
}

For cosmological simulations, we are usually interested in a large domain with periodic boundary conditions. Interestingly, grid-based algorithm like the Particle Mesh method naturally provides periodic boundary conditions by solving the Poisson's equation in the Fourier Space,

\[\begin{equation} \label{eqn:poisson} \nabla^2 \Phi(x) = 4 \pi G \rho(\textbf{x}) \implies - \textbf{k}^2 \Phi(\textbf{k}) = 4 \pi G \rho(\textbf{k}). \end{equation}\]

The overall time complexity is \(\mathcal{O}(N + N_{\textnormal{cell} } \log N_{\textnormal{cell} })\), with \(\mathcal{O}(N)\) for interpolation and \(\mathcal{O}(N_{\textnormal{cell} } \log N_{\textnormal{cell} })\) for FFT. Therefore, this method is very fast. However, it is not accurate for short range force where the length scale is smaller than the grid resolution. Thus, it is not suitable for inhomogeneous distributions with high density regions.

In this section, we would focus on the details in implementing the Particle Mesh algorithm. There are three steps in the algorithm:

Estimating the density of the underlying grid,
Solving the Poisson's equation in fourier space, and
Computing the acceleration by taking a gradient of the potential in real space and interpolating the acceleration to the particle.

Density estimation

In a sense, N-body simulation is a Monte Carlo technique for large scale simulations. Each particle is a sampling drawn from a distribution at \(t = 0\), where all \(N\) particles together models the underlying continuous distribution¹. Therefore, when estimating the density of the underlying grid, we should not think of the particles as a localized point with extreme high density (e.g. an enormous star), but a cloud of mass that is represented as a particle due to limited computational capability.

In figure 1, we shows the comparison between the Nearest Grid Point (NGP) scheme versus the Cloud-In-Cell (CIC) scheme. The latter provides a much smoother estimation of density. In 3D, a particle at \(\textbf{x}_p = (x_p, y_p, z_p)\) could contribute density to its parent cell \(\textbf{x}_c = (x_c, y_c, z_c)\) and seven neighboring cells. Define the weightings

\[\begin{equation} %\label{} \textbf{d} = \frac{\textbf{x}_p - \textbf{x}_c}{l}, \quad \textbf{t} = 1 - \textbf{d}, \end{equation}\]

where \(l\) is the cell length. We have the density contribution due to a particle²

\[\begin{align} \rho_{i, j, k} &= \rho_p t_x t_y t_z, &\rho_{i + 1, j + 1, k} &= \rho_p d_x d_y t_z, \\ \rho_{i + 1, j, k} &= \rho_p d_x t_y t_z, &\rho_{i + 1, j, k + 1} &= \rho_p d_x t_y d_z, \nonumber \\ \rho_{i, j + 1, k} &= \rho_p t_x d_y t_z, &\rho_{i, j + 1, k + 1} &= \rho_p t_x d_y d_z, \nonumber \\ \rho_{i, j, k + 1} &= \rho_p t_x t_y d_z, &\rho_{i + 1, j + 1, k + 1} &= \rho_p d_x d_y d_z, \nonumber \end{align}\]

where \(\rho_p = m_p / V_{\textnormal{unit cell}}\).

Cloud-in-cell — Figure 1: Two different schemes in estimating density in a discretized particle mesh.

Solving the Poisson's equation

Now, we have a grid with assigned density value on each grid point. To transform it into Fourier Space, we simply do a discrete fourier transform using FFT libraries (e.g. FFTW in C or numpy.fft in Python). Then, we compute the wave numbers by

\[\begin{equation} %\label{} \textbf{k} = \left( \frac{2 \pi n_x}{L_x}, \frac{2 \pi n_y}{L_y}, \frac{2 \pi n_z}{L_z} \right), \end{equation}\]

where \(\textbf{L} = (L_x, L_y, L_z)\) is the box length, and \(\textbf{n} = (n_x, n_y, n_z) \in [0, N)\) is the grid indices (one may need to wrap the indices above \(\frac{N}{2}\) to \(-\frac{N}{2}\) so that \((n_x, n_y, n_z) \in [- \frac{N}{2}, \frac{N}{2})\)). Then, we simply compute \(\textbf{k}^2\) and obtain the potential in Fourier Space

\[\begin{equation} %\label{} \Phi(\textbf{k}) = -\frac{4 \pi G}{\textbf{k}^2} \rho(\textbf{k}). \end{equation}\]

Interpolating the acceleration

Now, we do an inverse discrete fourier transform to obtain the potential on the grid points. Then, the acceleration can be obtained by computing \(\textbf{a} = - \nabla \Phi\) by a central finite difference scheme with fourth order accuracy:

\[\begin{equation} %\label{} f'(x) \approx \frac{1}{l^3} \left[ \frac{1}{12} f(x - 2l) - \frac{2}{3} f(x - l) + \frac{2}{3} f(x + l) - \frac{1}{12} f(x + 2l) \right] + \mathcal{O}(l^4), \end{equation}\]

Then, we perform a linear interpolation to obtain the acceleration of the particles. Reusing the notations from the cloud-in-cell scheme, we have

\[\begin{align} %\label{eqn:eqlabel} \textbf{a}_p &= \textbf{a}_{i, j, k} t_x t_y t_z + \textbf{a}_{i + 1, j, k} d_x t_y t_z + \textbf{a}_{i, j + 1, k} t_x d_y t_z + \textbf{a}_{i, j, k + 1} t_x t_y d_z \nonumber \\ &\quad\,+ \textbf{a}_{i + 1, j + 1, k} d_x d_y t_z + \textbf{a}_{i + 1, j, k + 1} d_x t_y d_z + \textbf{a}_{i, j + 1, k + 1} t_x d_y d_z + \textbf{a}_{i + 1, j + 1, k + 1} d_x d_y d_z. \end{align}\]

W. Dehnen and J. I. Read. N-body simulations of gravitational dynamics. The European Physical Journal Plus, 126(5):55, May 2011. URL: http://link.springer.com/10.1140/epjp/i2011-11055-3 (visited on 2025-03-31), doi:10.1140/epjp/i2011-11055-3. ↩
Andrey Kravtsov. Writing a pm code. \url https://astro.uchicago.edu/ andrey/Talks/PM/pm.pdf, March 2002. Accessed: 2025-04-13. ↩

Particle-Mesh algorithm

Density estimation

Solving the Poisson's equation

Interpolating the acceleration

Comments