powerit
C++ implementation for power iterations
Loading...
Searching...
No Matches
simple.hpp
Go to the documentation of this file.
1#ifndef POWERIT_SIMPLE_HPP
2#define POWERIT_SIMPLE_HPP
3
4#include "core.hpp"
5#include <numeric>
6
13#ifndef POWERIT_CUSTOM_PARALLEL
14#include "subpar/subpar.hpp"
15#endif
16
17namespace powerit {
18
31template<typename Task_, class Run_>
32void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range) {
33#ifndef POWERIT_CUSTOM_PARALLEL
34 // We can set nothrow_ = true because of the simplicity of the code below;
35 // no explicit throws, no allocations that could throw bad_alloc, just math.
36 subpar::parallelize_range<true>(num_workers, num_tasks, std::move(run_task_range));
37#else
38 POWERIT_CUSTOM_PARALLEL(num_workers, num_tasks, run_task_range);
39#endif
40}
41
59template<typename Data_, class Engine_>
60Result<Data_> compute(size_t order, const Data_* matrix, bool row_major, Data_* vector, Engine_& engine, const Options& opt) {
61 fill_starting_vector(order, vector, engine);
62 return compute(order, matrix, row_major, vector, opt);
63}
64
81template<typename Data_>
82Result<Data_> compute(size_t order, const Data_* matrix, bool row_major, Data_* vector, const Options& opt) {
83 if (row_major) {
84 return compute_core(order, [&](std::vector<Data_>& buffer, const Data_* vec) {
85 parallelize(opt.num_threads, order, [&](int, size_t start, size_t length) {
86 for (size_t j = start, end = start + length; j < end; ++j) {
87 // Note that j and order are already both 'size_t', so no need to cast to avoid overflow.
88 buffer[j] = std::inner_product(vec, vec + order, matrix + j * order, static_cast<Data_>(0.0));
89 }
90 });
91 }, vector, opt);
92
93 } else if (opt.num_threads == 1) {
94 // Dedicated path to avoid allocating a per-thread temporary.
95 return compute_core(order, [&](std::vector<Data_>& buffer, const Data_* vec) {
96 std::fill(buffer.begin(), buffer.end(), 0);
97 auto matcopy = matrix;
98 for (size_t j = 0; j < order; ++j) {
99 Data_ mult = vec[j];
100 for (size_t k = 0; k < order; ++k, ++matcopy) {
101 buffer[k] += mult * (*matcopy);
102 }
103 }
104 }, vector, opt);
105
106 } else {
107 // We make a separate buffer for each thread to avoid false sharing problems.
108 // We do the allocation outside so that (i) we can re-use memory, and
109 // (ii) the code inside the parallelize() cannot throw.
110 std::vector<std::vector<Data_> > temp_buffers(opt.num_threads);
111 for (int i = 0; i < opt.num_threads; ++i) {
112 temp_buffers[i].resize(order);
113 }
114
115 return compute_core(order, [&](std::vector<Data_>& buffer, const Data_* vec) {
116 parallelize(opt.num_threads, order, [&](int t, size_t start, size_t length) {
117 auto& tmp = temp_buffers[t];
118 std::fill_n(tmp.begin(), length, 0);
119
120 size_t offset = start; // already size_t's, no need to cast.
121 for (size_t j = 0; j < order; ++j, offset += order) {
122 auto mult = vec[j];
123 auto matcopy = matrix + offset;
124 for (size_t k = 0; k < length; ++k, ++matcopy) {
125 tmp[k] += mult * (*matcopy);
126 }
127 }
128
129 std::copy_n(tmp.begin(), length, buffer.begin() + start);
130 });
131 }, vector, opt);
132 }
133}
134
135}
136
137#endif
Core data structures and calculations.
Namespace for power iterations.
Definition core.hpp:15
Result< Data_ > compute(size_t order, const Data_ *matrix, bool row_major, Data_ *vector, Engine_ &engine, const Options &opt)
Definition simple.hpp:60
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
Definition simple.hpp:32
Result< Data_ > compute_core(size_t order, Multiply_ multiply, Data_ *vector, const Options &opt)
Definition core.hpp:131
void fill_starting_vector(size_t order, Data_ *vector, Engine_ &engine)
Definition core.hpp:93
void parallelize_range(int num_workers, Task_ num_tasks, Run_ run_task_range)
Options for compute().
Definition core.hpp:20
int num_threads
Definition core.hpp:40
Result of compute().
Definition core.hpp:70