Integral Approximation 2

/**
 * @file
 * @brief [Monte Carlo
 * Integration](https://en.wikipedia.org/wiki/Monte_Carlo_integration)
 *
 * @details
 * In mathematics, Monte Carlo integration is a technique for numerical
 * integration using random numbers. It is a particular Monte Carlo method that
 * numerically computes a definite integral. While other algorithms usually
 * evaluate the integrand at a regular grid, Monte Carlo randomly chooses points
 * at which the integrand is evaluated. This method is particularly useful for
 * higher-dimensional integrals.
 *
 * This implementation supports arbitrary pdfs.
 * These pdfs are sampled using the [Metropolis-Hastings
 * algorithm](https://en.wikipedia.org/wiki/Metropolis–Hastings_algorithm). This
 * can be swapped out by every other sampling techniques for example the inverse
 * method. Metropolis-Hastings was chosen because it is the most general and can
 * also be extended for a higher dimensional sampling space.
 *
 * @author [Domenic Zingsheim](https://github.com/DerAndereDomenic)
 */

#define _USE_MATH_DEFINES  /// for M_PI on windows
#include <cmath>           /// for math functions
#include <cstdint>         /// for fixed size data types
#include <ctime>           /// for time to initialize rng
#include <functional>      /// for function pointers
#include <iostream>        /// for std::cout
#include <random>          /// for random number generation
#include <vector>          /// for std::vector

/**
 * @namespace math
 * @brief Math algorithms
 */
namespace math {
/**
 * @namespace monte_carlo
 * @brief Functions for the [Monte Carlo
 * Integration](https://en.wikipedia.org/wiki/Monte_Carlo_integration)
 * implementation
 */
namespace monte_carlo {

using Function = std::function<double(
    double&)>;  /// short-hand for std::functions used in this implementation

/**
 * @brief Generate samples according to some pdf
 * @details This function uses Metropolis-Hastings to generate random numbers.
 * It generates a sequence of random numbers by using a markov chain. Therefore,
 * we need to define a start_point and the number of samples we want to
 * generate. Because the first samples generated by the markov chain may not be
 * distributed according to the given pdf, one can specify how many samples
 * should be discarded before storing samples.
 * @param start_point The starting point of the markov chain
 * @param pdf The pdf to sample
 * @param num_samples The number of samples to generate
 * @param discard How many samples should be discarded at the start
 * @returns A vector of size num_samples with samples distributed according to
 * the pdf
 */
std::vector<double> generate_samples(const double& start_point,
                                     const Function& pdf,
                                     const uint32_t& num_samples,
                                     const uint32_t& discard = 100000) {
    std::vector<double> samples;
    samples.reserve(num_samples);

    double x_t = start_point;

    std::default_random_engine generator;
    std::uniform_real_distribution<double> uniform(0.0, 1.0);
    std::normal_distribution<double> normal(0.0, 1.0);
    generator.seed(time(nullptr));

    for (uint32_t t = 0; t < num_samples + discard; ++t) {
        // Generate a new proposal according to some mutation strategy.
        // This is arbitrary and can be swapped.
        double x_dash = normal(generator) + x_t;
        double acceptance_probability = std::min(pdf(x_dash) / pdf(x_t), 1.0);
        double u = uniform(generator);

        // Accept "new state" according to the acceptance_probability
        if (u <= acceptance_probability) {
            x_t = x_dash;
        }

        if (t >= discard) {
            samples.push_back(x_t);
        }
    }

    return samples;
}

/**
 * @brief Compute an approximation of an integral using Monte Carlo integration
 * @details The integration domain [a,b] is given by the pdf.
 * The pdf has to fulfill the following conditions:
 * 1) for all x \in [a,b] : p(x) > 0
 * 2) for all x \not\in [a,b] : p(x) = 0
 * 3) \int_a^b p(x) dx = 1
 * @param start_point The start point of the Markov Chain (see generate_samples)
 * @param function The function to integrate
 * @param pdf The pdf to sample
 * @param num_samples The number of samples used to approximate the integral
 * @returns The approximation of the integral according to 1/N \sum_{i}^N f(x_i)
 * / p(x_i)
 */
double integral_monte_carlo(const double& start_point, const Function& function,
                            const Function& pdf,
                            const uint32_t& num_samples = 1000000) {
    double integral = 0.0;
    std::vector<double> samples =
        generate_samples(start_point, pdf, num_samples);

    for (double sample : samples) {
        integral += function(sample) / pdf(sample);
    }

    return integral / static_cast<double>(samples.size());
}

}  // namespace monte_carlo
}  // namespace math

/**
 * @brief Self-test implementations
 * @returns void
 */
static void test() {
    std::cout << "Disclaimer: Because this is a randomized algorithm,"
              << std::endl;
    std::cout
        << "it may happen that singular samples deviate from the true result."
        << std::endl
        << std::endl;
    ;

    math::monte_carlo::Function f;
    math::monte_carlo::Function pdf;
    double integral = 0;
    double lower_bound = 0, upper_bound = 0;

    /* \int_{-2}^{2} -x^2 + 4 dx */
    f = [&](double& x) { return -x * x + 4.0; };

    lower_bound = -2.0;
    upper_bound = 2.0;
    pdf = [&](double& x) {
        if (x >= lower_bound && x <= -1.0) {
            return 0.1;
        }
        if (x <= upper_bound && x >= 1.0) {
            return 0.1;
        }
        if (x > -1.0 && x < 1.0) {
            return 0.4;
        }
        return 0.0;
    };

    integral = math::monte_carlo::integral_monte_carlo(
        (upper_bound - lower_bound) / 2.0, f, pdf);

    std::cout << "This number should be close to 10.666666: " << integral
              << std::endl;

    /* \int_{0}^{1} e^x dx */
    f = [&](double& x) { return std::exp(x); };

    lower_bound = 0.0;
    upper_bound = 1.0;
    pdf = [&](double& x) {
        if (x >= lower_bound && x <= 0.2) {
            return 0.1;
        }
        if (x > 0.2 && x <= 0.4) {
            return 0.4;
        }
        if (x > 0.4 && x < upper_bound) {
            return 1.5;
        }
        return 0.0;
    };

    integral = math::monte_carlo::integral_monte_carlo(
        (upper_bound - lower_bound) / 2.0, f, pdf);

    std::cout << "This number should be close to 1.7182818: " << integral
              << std::endl;

    /* \int_{-\infty}^{\infty} sinc(x) dx, sinc(x) = sin(pi * x) / (pi * x)
       This is a difficult integral because of its infinite domain.
       Therefore, it may deviate largely from the expected result.
    */
    f = [&](double& x) { return std::sin(M_PI * x) / (M_PI * x); };

    pdf = [&](double& x) {
        return 1.0 / std::sqrt(2.0 * M_PI) * std::exp(-x * x / 2.0);
    };

    integral = math::monte_carlo::integral_monte_carlo(0.0, f, pdf, 10000000);

    std::cout << "This number should be close to 1.0: " << integral
              << std::endl;
}

/**
 * @brief Main function
 * @returns 0 on exit
 */
int main() {
    test();  // run self-test implementations
    return 0;
}