powerit/simple_8hpp_source.html

#ifndef POWERIT_SIMPLE_HPP

#define POWERIT_SIMPLE_HPP


#include "core.hpp"

#include <numeric>


#ifndef POWERIT_CUSTOM_PARALLEL

#include "subpar/subpar.hpp"

#endif


namespace powerit {


template<typename Task_, class Run_>


void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range) {

#ifndef POWERIT_CUSTOM_PARALLEL

    // We can set nothrow_ = true because of the simplicity of the code below;

    // no explicit throws, no allocations that could throw bad_alloc, just math.

    subpar::parallelize_range<true>(num_workers, num_tasks, std::move(run_task_range));

#else

    POWERIT_CUSTOM_PARALLEL(num_workers, num_tasks, run_task_range);

#endif

}


template<typename Data_, class Engine_>


Result<Data_> compute(size_t order, const Data_* matrix, bool row_major, Data_* vector, Engine_& engine, const Options& opt) {

    fill_starting_vector(order, vector, engine);

    return compute(order, matrix, row_major, vector, opt);

}


template<typename Data_>


Result<Data_> compute(size_t order, const Data_* matrix, bool row_major, Data_* vector, const Options& opt) {

    if (row_major) {

        return compute_core(order, [&](std::vector<Data_>& buffer, const Data_* vec) {

            parallelize(opt.num_threads, order, [&](int, size_t start, size_t length) {

                for (size_t j = start, end = start + length; j < end; ++j) {

                    // Note that j and order are already both 'size_t', so no need to cast to avoid overflow.

                    buffer[j] = std::inner_product(vec, vec + order, matrix + j * order, static_cast<Data_>(0.0));

                }

            });

        }, vector, opt);


    } else if (opt.num_threads == 1) {

        // Dedicated path to avoid allocating a per-thread temporary.

        return compute_core(order, [&](std::vector<Data_>& buffer, const Data_* vec) {

            std::fill(buffer.begin(), buffer.end(), 0);

            auto matcopy = matrix;

            for (size_t j = 0; j < order; ++j) {

                Data_ mult = vec[j];

                for (size_t k = 0; k < order; ++k, ++matcopy) {

                    buffer[k] += mult * (*matcopy);

                }

            }

        }, vector, opt);


    } else {

        // We make a separate buffer for each thread to avoid false sharing problems.

        // We do the allocation outside so that (i) we can re-use memory, and

        // (ii) the code inside the parallelize() cannot throw.

        std::vector<std::vector<Data_> > temp_buffers(opt.num_threads);

        for (int i = 0; i < opt.num_threads; ++i) {

            temp_buffers[i].resize(order);

        }


        return compute_core(order, [&](std::vector<Data_>& buffer, const Data_* vec) {

            parallelize(opt.num_threads, order, [&](int t, size_t start, size_t length) {

                auto& tmp = temp_buffers[t];

                std::fill_n(tmp.begin(), length, 0);


                size_t offset = start; // already size_t's, no need to cast.

                for (size_t j = 0; j < order; ++j, offset += order) {

                    auto mult = vec[j];

                    auto matcopy = matrix + offset;

                    for (size_t k = 0; k < length; ++k, ++matcopy) {

                        tmp[k] += mult * (*matcopy);

                    }

                }


                std::copy_n(tmp.begin(), length, buffer.begin() + start);

            });

        }, vector, opt);

    }

}


}


#endif

core.hpp
Core data structures and calculations.

powerit
Namespace for power iterations.
Definition core.hpp:15

powerit::compute
Result< Data_ > compute(size_t order, const Data_ *matrix, bool row_major, Data_ *vector, Engine_ &engine, const Options &opt)
Definition simple.hpp:60

powerit::parallelize
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
Definition simple.hpp:32

powerit::compute_core
Result< Data_ > compute_core(size_t order, Multiply_ multiply, Data_ *vector, const Options &opt)
Definition core.hpp:131

powerit::fill_starting_vector
void fill_starting_vector(size_t order, Data_ *vector, Engine_ &engine)
Definition core.hpp:93

subpar::parallelize_range
void parallelize_range(int num_workers, Task_ num_tasks, Run_ run_task_range)

powerit::Options
Options for compute().
Definition core.hpp:20

powerit::Options::num_threads
int num_threads
Definition core.hpp:40

powerit::Result
Result of compute().
Definition core.hpp:70

subpar.hpp